From 5b5705fe906670b5533fa9f6bd3759695887aeea Mon Sep 17 00:00:00 2001 From: Thomas Keller Date: Wed, 23 Mar 2011 00:26:26 +0100 Subject: [PATCH] Add a unit test for monotone's basicio parser and compiler. Also note a couple of quirks and be less strict (for now) when it comes to hash parsing and stanza lines without values. --- src/IDF/Scm/Monotone/BasicIO.php | 102 +++++++++-------- test/IDF/Scm/Monotone/BasicIOTest.php | 155 ++++++++++++++++++++++++++ 2 files changed, 212 insertions(+), 45 deletions(-) create mode 100644 test/IDF/Scm/Monotone/BasicIOTest.php diff --git a/src/IDF/Scm/Monotone/BasicIO.php b/src/IDF/Scm/Monotone/BasicIO.php index e783a90..dfac789 100644 --- a/src/IDF/Scm/Monotone/BasicIO.php +++ b/src/IDF/Scm/Monotone/BasicIO.php @@ -21,6 +21,8 @@ # # ***** END LICENSE BLOCK ***** */ +require_once 'IDF/Scm/Exception.php'; + /** * Utility class to parse and compile basic_io stanzas * @@ -31,6 +33,11 @@ class IDF_Scm_Monotone_BasicIO /** * Parses monotone's basic_io format * + * Known quirks: + * - does not handle multi-values starting with a hash '[]' (no known output) + * - does not validate hashes (should be /[0-9a-f]{40}/i) + * - does not handle forbidden \0 + * * @param string $in * @return array of arrays */ @@ -54,50 +61,56 @@ class IDF_Scm_Monotone_BasicIO $stanzaLine['key'] .= $ch; } - // symbol w/o a value list - if ($pos >= $length || $in[$pos] == "\n") break; - - if ($in[$pos] == '[') { + // ensure we don't look at a symbol w/o a value list + if ($pos >= $length || $in[$pos] == "\n") { unset($stanzaLine['values']); - ++$pos; // opening square bracket - $stanzaLine['hash'] = substr($in, $pos, 40); - $pos += 40; - ++$pos; // closing square bracket - } - else - { unset($stanzaLine['hash']); - $valCount = 0; - // if hashs and plain values are encountered in the same - // value list, we add the hash values as simple values as well - while ($in[$pos] == '"' || $in[$pos] == '[') { - $isHashValue = $in[$pos] == '['; - ++$pos; // opening quote / bracket - $stanzaLine['values'][$valCount] = ''; - while ($pos < $length) { - $ch = $in[$pos]; $pr = $in[$pos-1]; - if (($isHashValue && $ch == ']') - ||(!$isHashValue && $ch == '"' && $pr != '\\')) - break; + } + else { + if ($in[$pos] == '[') { + unset($stanzaLine['values']); + ++$pos; // opening square bracket + while ($pos < $length && $in[$pos] != ']') { + $stanzaLine['hash'] .= $in[$pos]; ++$pos; - $stanzaLine['values'][$valCount] .= $ch; } - ++$pos; // closing quote + ++$pos; // closing square bracket + } + else + { + unset($stanzaLine['hash']); + $valCount = 0; + // if hashs and plain values are encountered in the same + // value list, we add the hash values as simple values as well + while ($in[$pos] == '"' || $in[$pos] == '[') { + $isHashValue = $in[$pos] == '['; + ++$pos; // opening quote / bracket + $stanzaLine['values'][$valCount] = ''; + while ($pos < $length) { + $ch = $in[$pos]; $pr = $in[$pos-1]; + if (($isHashValue && $ch == ']') + ||(!$isHashValue && $ch == '"' && $pr != '\\')) + break; + ++$pos; + $stanzaLine['values'][$valCount] .= $ch; + } + ++$pos; // closing quote - if (!$isHashValue) { - $stanzaLine['values'][$valCount] = str_replace( - array("\\\\", "\\\""), - array("\\", "\""), - $stanzaLine['values'][$valCount] - ); - } + if (!$isHashValue) { + $stanzaLine['values'][$valCount] = str_replace( + array("\\\\", "\\\""), + array("\\", "\""), + $stanzaLine['values'][$valCount] + ); + } - if ($pos >= $length) - break; + if ($pos >= $length) + break; - if ($in[$pos] == ' ') { - ++$pos; // space - ++$valCount; + if ($in[$pos] == ' ') { + ++$pos; // space + ++$valCount; + } } } } @@ -114,6 +127,12 @@ class IDF_Scm_Monotone_BasicIO /** * Compiles monotone's basicio format * + * Known quirks: + * - does not validate keys for /[a-z_]+/ + * - does not validate hashes (should be /[0-9a-f]{40}/i) + * - does not support intermixed value / hash formats + * - does not handle forbidden \0 + * * @param array $in Array of arrays * @return string */ @@ -129,7 +148,7 @@ class IDF_Scm_Monotone_BasicIO $maxkeylength = 0; foreach ((array)$stanza as $lx => $line) { - if (!array_key_exists('key', $line)) { + if (!array_key_exists('key', $line) || empty($line['key'])) { throw new IDF_Scm_Exception( '"key" not found in basicio stanza '.$sx.', line '.$lx ); @@ -157,13 +176,6 @@ class IDF_Scm_Monotone_BasicIO $value).'"'; } } - else - { - throw new IDF_Scm_Exception( - 'neither "hash" nor "values" found in basicio '. - 'stanza '.$sx.', line '.$lx - ); - } $out .= "\n"; } diff --git a/test/IDF/Scm/Monotone/BasicIOTest.php b/test/IDF/Scm/Monotone/BasicIOTest.php new file mode 100644 index 0000000..0ba792a --- /dev/null +++ b/test/IDF/Scm/Monotone/BasicIOTest.php @@ -0,0 +1,155 @@ +assertTrue(is_array($stanzas) && count($stanzas) == 0); + + // single stanza, single line, only key + $stanzas = IDF_Scm_Monotone_BasicIO::parse('foo'); + $this->assertEquals(1, count($stanzas)); + $stanza = $stanzas[0]; + $this->assertEquals(1, count($stanza)); + $entry = $stanza[0]; + $this->assertEquals('foo', $entry['key']); + $this->assertTrue(!array_key_exists('hash', $entry)); + $this->assertTrue(!array_key_exists('values', $entry)); + + // single stanza, single line, key with hash + $stanzas = IDF_Scm_Monotone_BasicIO::parse("foo [0123456789012345678901234567890123456789]"); + $this->assertEquals(1, count($stanzas)); + $stanza = $stanzas[0]; + $this->assertEquals(1, count($stanza)); + $entry = $stanza[0]; + $this->assertEquals('foo', $entry['key']); + $this->assertEquals("0123456789012345678901234567890123456789", $entry['hash']); + $this->assertTrue(!array_key_exists('values', $entry)); + + // single stanza, single line, key with two values + $stanzas = IDF_Scm_Monotone_BasicIO::parse("foo \"bar\n\nbaz\" \"bla\""); + $this->assertEquals(1, count($stanzas)); + $stanza = $stanzas[0]; + $this->assertEquals(1, count($stanza)); + $entry = $stanza[0]; + $this->assertEquals('foo', $entry['key']); + $this->assertTrue(!array_key_exists('hash', $entry)); + $this->assertEquals(array("bar\n\nbaz", "bla"), $entry['values']); + + // single stanza, single line, key with a value and a hash + $stanzas = IDF_Scm_Monotone_BasicIO::parse("foo \"bar\n\nbaz\" [0123456789012345678901234567890123456789]"); + $this->assertEquals(1, count($stanzas)); + $stanza = $stanzas[0]; + $this->assertEquals(1, count($stanza)); + $entry = $stanza[0]; + $this->assertEquals('foo', $entry['key']); + $this->assertTrue(!array_key_exists('hash', $entry)); + $this->assertEquals(array("bar\n\nbaz", "0123456789012345678901234567890123456789"), $entry['values']); + + // single stanza, two lines, keys with single value / hash + $stanzas = IDF_Scm_Monotone_BasicIO::parse("foo \"bar\"\nbaz [0123456789012345678901234567890123456789]"); + $this->assertEquals(1, count($stanzas)); + $stanza = $stanzas[0]; + $this->assertEquals(2, count($stanza)); + $entry = $stanza[0]; + $this->assertEquals('foo', $entry['key']); + $this->assertTrue(!array_key_exists('hash', $entry)); + $this->assertEquals(array("bar"), $entry['values']); + $entry = $stanza[1]; + $this->assertEquals('baz', $entry['key']); + $this->assertTrue(!array_key_exists('values', $entry)); + $this->assertEquals("0123456789012345678901234567890123456789", $entry['hash']); + + // two stanza, one two liner, one one liner + $stanzas = IDF_Scm_Monotone_BasicIO::parse("foo \"bar\"\nbaz [0123456789012345678901234567890123456789]\n\nbla \"blub\""); + $this->assertEquals(2, count($stanzas)); + $stanza = $stanzas[0]; + $this->assertEquals(2, count($stanza)); + $entry = $stanza[0]; + $this->assertEquals('foo', $entry['key']); + $this->assertTrue(!array_key_exists('hash', $entry)); + $this->assertEquals(array("bar"), $entry['values']); + $entry = $stanza[1]; + $this->assertEquals('baz', $entry['key']); + $this->assertTrue(!array_key_exists('values', $entry)); + $this->assertEquals("0123456789012345678901234567890123456789", $entry['hash']); + $stanza = $stanzas[1]; + $this->assertEquals(1, count($stanza)); + $entry = $stanza[0]; + $this->assertEquals('bla', $entry['key']); + $this->assertTrue(!array_key_exists('hash', $entry)); + $this->assertEquals(array("blub"), $entry['values']); + + // (un)escaping tests + $stanzas = IDF_Scm_Monotone_BasicIO::parse('foo "bar\\baz" "bla\"blub"'); + $this->assertEquals(1, count($stanzas)); + $stanza = $stanzas[0]; + $this->assertEquals(1, count($stanza)); + $entry = $stanza[0]; + $this->assertEquals('foo', $entry['key']); + $this->assertTrue(!array_key_exists('hash', $entry)); + $this->assertEquals(array('bar\baz', 'bla"blub'), $entry['values']); + + } + + public function testCompile() + { + $stanzas = array( + array( + array('key' => 'foo'), + array('key' => 'bar', 'values' => array('one', "two\nthree")), + ), + array( + array('key' => 'baz', 'hash' => '0123456789012345678901234567890123456789'), + array('key' => 'blablub', 'values' => array('one"two', 'three\four')), + ), + ); + + $ex =<<assertEquals($ex, IDF_Scm_Monotone_BasicIO::compile($stanzas)); + + // keys must not be null + $stanzas = array( + array( + array('key' => null, 'values' => array('foo')), + ), + ); + + $thrown = false; + try { + IDF_Scm_Monotone_BasicIO::compile($stanzas); + } catch (IDF_Scm_Exception $e) { + $this->assertRegExp('/^"key" not found in basicio stanza/', $e->getMessage()); + $thrown = true; + } + $this->assertTrue($thrown); + + // ...nor completly non-existing + $stanzas = array( + array( + array('values' => array('foo')), + ), + ); + + $thrown = false; + try { + IDF_Scm_Monotone_BasicIO::compile($stanzas); + } catch (IDF_Scm_Exception $e) { + $this->assertRegExp('/^"key" not found in basicio stanza/', $e->getMessage()); + $thrown = true; + } + $this->assertTrue($thrown); + } +} +