adding code
This commit is contained in:
parent
c40ff89fbe
commit
39cf9ae84e
21
Makefile
Normal file
21
Makefile
Normal file
@ -0,0 +1,21 @@
|
||||
CC=g++
|
||||
CFLAGS=-c -Wall -I./include
|
||||
LDFLAGS=-I./include
|
||||
SOURCES=main.cpp ./src/csvpp.cpp ./src/stringhelper.cpp
|
||||
TEST=tests.cpp ./src/csvpp.cpp ./src/stringhelper.cpp
|
||||
OBJECTS=$(SOURCES:.cpp=.o)
|
||||
EXECUTABLE=csvpp
|
||||
|
||||
all: $(SOURCES) $(EXECUTABLE)
|
||||
|
||||
$(EXECUTABLE): $(OBJECTS)
|
||||
$(CC) $(LDFLAGS) $(OBJECTS) -o $@
|
||||
|
||||
.cpp.o:
|
||||
$(CC) $(CFLAGS) $< -o $@
|
||||
|
||||
clean:
|
||||
rm -f src/*o csvpp
|
||||
|
||||
tests:
|
||||
$(CC) $(LDFLAGS) ./src/csvpp.o ./src/stringhelper.o tests.cpp -o test
|
64
csv2.py
Normal file
64
csv2.py
Normal file
@ -0,0 +1,64 @@
|
||||
|
||||
class RowReader(dict):
|
||||
header = []
|
||||
skipheader = False
|
||||
def __init__(self,skipheader=False,newline="\n",*arg,**kw):
|
||||
super(RowReader, self).__init__(*arg, **kw)
|
||||
|
||||
def __rrshift__(self, stream):
|
||||
strbuffer = ""
|
||||
buffer2 = ""
|
||||
currentheader = 0
|
||||
strbuffer = stream
|
||||
strbuffer = strbuffer.strip()
|
||||
c = ''
|
||||
startquote = False
|
||||
if (len(self.header) == 0 and not self.skipheader):
|
||||
self.header = strbuffer.split(",")
|
||||
else:
|
||||
for i in range(0, len(strbuffer)):
|
||||
c = strbuffer[i]
|
||||
if c == ",":
|
||||
if startquote:
|
||||
buffer2 += c
|
||||
continue
|
||||
if not self.skipheader:
|
||||
self[self.header[currentheader]] = buffer2
|
||||
else:
|
||||
self[str(currentheader)] = buffer2
|
||||
buffer2 = ""
|
||||
currentheader += 1
|
||||
continue
|
||||
|
||||
if c == "\"":
|
||||
if startquote:
|
||||
buffer2 += "\""
|
||||
buffer2 = buffer2.ltrim()
|
||||
if (i-1) >= 0 and buffer[i-1] == "\\":
|
||||
buffer2.write(c)
|
||||
continue
|
||||
startquote = not startquote
|
||||
if not startquote:
|
||||
for x in range(i, len(strbuffer)):
|
||||
if strbuffer[x] == ',' or x == len(strbuffer):
|
||||
i = x - 1
|
||||
break
|
||||
|
||||
buffer2 += c
|
||||
if not self.skipheader:
|
||||
self[self.header[currentheader]] = buffer2
|
||||
else:
|
||||
self[str(currentheader)] = buffer2
|
||||
return stream
|
||||
|
||||
class RowWriter(list):
|
||||
def __init__(self,*arg,**kw):
|
||||
super(RowWriter, self).__init__(*arg, **kw)
|
||||
pass
|
||||
|
||||
f = open("sampledata.csv")
|
||||
r = RowReader()
|
||||
for l in f:
|
||||
l >> r
|
||||
if r != {}:
|
||||
print str(r)
|
23
csvtest.py
Normal file
23
csvtest.py
Normal file
@ -0,0 +1,23 @@
|
||||
import csv
|
||||
|
||||
ifile = open("sampledata.csv")
|
||||
reader = csv.DictReader(ifile)
|
||||
|
||||
rownum = 0
|
||||
header = []
|
||||
|
||||
for r in reader:
|
||||
print r
|
||||
#for row in reader:
|
||||
# if rownum == 0:
|
||||
# header = row
|
||||
# else:
|
||||
# colnum = 0
|
||||
# for col in row:
|
||||
# print "%s => %s" % (header[colnum], col)
|
||||
# colnum += 1
|
||||
#
|
||||
# print "\n"
|
||||
# rownum += 1
|
||||
|
||||
ifile.close()
|
42
include/csvpp.h
Normal file
42
include/csvpp.h
Normal file
@ -0,0 +1,42 @@
|
||||
#ifndef CSVPP_H
|
||||
#define CSVPP_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
#include <istream>
|
||||
|
||||
|
||||
#define VERSION "2.2"
|
||||
|
||||
namespace csvpp {
|
||||
|
||||
class RowWriter;
|
||||
|
||||
class RowReader : public std::map<std::string, std::string> {
|
||||
private:
|
||||
std::vector<std::string> header;
|
||||
bool skipheader;
|
||||
std::string delimiter_char; // this is a string because the split function helper is expecting a string, but really this is just a char
|
||||
public:
|
||||
const char * newline;
|
||||
// Adding support for custom delimiter character
|
||||
// Based on the patch by Hanifa
|
||||
// https://code.google.com/p/csvpp/issues/detail?id=2
|
||||
RowReader(std::string delimiter_char = ",", bool skipheader=false,const char * newline="\n") : delimiter_char(delimiter_char), skipheader(skipheader), newline(newline) { }
|
||||
void clear() { header.clear(); }
|
||||
friend std::istream & operator>>(std::istream & os, RowReader & r);
|
||||
friend std::ostream & operator<<(std::ostream & os, const RowWriter & r);
|
||||
};
|
||||
|
||||
class RowWriter : public std::vector<RowReader>
|
||||
{
|
||||
public:
|
||||
friend std::ostream & operator<<(std::ostream & os, const RowWriter & r);
|
||||
};
|
||||
|
||||
typedef RowReader::const_iterator rowiterator;
|
||||
}
|
||||
|
||||
#endif
|
46
include/stringhelper.h
Normal file
46
include/stringhelper.h
Normal file
@ -0,0 +1,46 @@
|
||||
#ifndef STRINGHELPER_H
|
||||
#define STRINGHELPER_H
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <iterator>
|
||||
#include <sstream>
|
||||
//Borrowed from http://www.cplusplus.com/faq/sequences/strings/trim/
|
||||
inline std::string trim_right_copy(
|
||||
const std::string& s,
|
||||
const std::string& delimiters = " \f\n\r\t\v" )
|
||||
{
|
||||
return s.substr( 0, s.find_last_not_of( delimiters ) + 1 );
|
||||
}
|
||||
|
||||
inline std::string trim_left_copy(
|
||||
const std::string& s,
|
||||
const std::string& delimiters = " \f\n\r\t\v" )
|
||||
{
|
||||
return s.substr( s.find_first_not_of( delimiters ) );
|
||||
}
|
||||
|
||||
inline std::string trim_copy(
|
||||
const std::string& s,
|
||||
const std::string& delimiters = " \f\n\r\t\v" )
|
||||
{
|
||||
return trim_left_copy( trim_right_copy( s, delimiters ), delimiters );
|
||||
}
|
||||
|
||||
typedef std::string::size_type (std::string::*find_t)(const std::string& delim,
|
||||
std::string::size_type offset) const;
|
||||
|
||||
std::vector<std::string> split(const std::string& s,
|
||||
const std::string& match,
|
||||
bool removeEmpty=false,
|
||||
bool fullMatch=false);
|
||||
|
||||
//http://stackoverflow.com/a/13636164/195722
|
||||
template <typename T>
|
||||
std::string ObjToString ( T obj )
|
||||
{
|
||||
std::ostringstream ss;
|
||||
ss << obj;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
#endif
|
23
main.cpp
Normal file
23
main.cpp
Normal file
@ -0,0 +1,23 @@
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include "csvpp.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace csvpp;
|
||||
|
||||
int main()
|
||||
{
|
||||
RowReader tmp;
|
||||
stringstream ss;
|
||||
ss << "field1,field2,field3\r\n123,234,345\r\n999,000,111\r\n";
|
||||
ss >> tmp;
|
||||
rowiterator it;
|
||||
while(ss >> tmp)
|
||||
{
|
||||
for(it = tmp.begin(); it != tmp.end(); it++)
|
||||
cout << it->first << " => " << it->second << endl;
|
||||
cout << endl;
|
||||
}
|
||||
return 0;
|
||||
|
||||
}
|
1000
sampledata.csv
Normal file
1000
sampledata.csv
Normal file
File diff suppressed because it is too large
Load Diff
139
src/csvpp.cpp
Normal file
139
src/csvpp.cpp
Normal file
@ -0,0 +1,139 @@
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <istream>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include "csvpp.h"
|
||||
#include "stringhelper.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace csvpp;
|
||||
|
||||
namespace csvpp {
|
||||
|
||||
std::ostream & operator<<(std::ostream & os, const RowWriter & r)
|
||||
{
|
||||
|
||||
rowiterator it;
|
||||
if (r.size() == 0)
|
||||
return os;
|
||||
|
||||
if (!r[0].skipheader)
|
||||
{
|
||||
for(unsigned int i = 0; i < r[0].header.size() - 1; i++)
|
||||
{
|
||||
os << r[0].header[i] << r[0].delimiter_char;;
|
||||
}
|
||||
os << r[0].header[r[0].header.size() - 1] << r[0].newline;
|
||||
}
|
||||
for(unsigned int i = 0; i < r.size(); i++)
|
||||
{
|
||||
for(it = r[i].begin(); it != r[i].end(); it++)
|
||||
{
|
||||
if (distance(r[i].begin(), it) != (int)(r[i].size() - 1))
|
||||
os << it->second << r[i].delimiter_char;
|
||||
else
|
||||
os << it->second;
|
||||
}
|
||||
if (!r[0].skipheader && i != 0)
|
||||
os << r[i].newline;
|
||||
}
|
||||
|
||||
return os;
|
||||
}
|
||||
|
||||
std::istream & operator>>(std::istream & is, RowReader & r)
|
||||
{
|
||||
string buffer;
|
||||
stringstream buffer2;
|
||||
int currentheader = 0;
|
||||
getline(is, buffer);
|
||||
// Patch by damienlmoore - https://code.google.com/p/csvpp/issues/detail?id=1
|
||||
if(!is.good() || is.eof())
|
||||
{
|
||||
return is;
|
||||
}
|
||||
|
||||
buffer = trim_copy(buffer);
|
||||
char c;
|
||||
bool startquote = false;
|
||||
if(r.header.size() == 0 && !r.skipheader)
|
||||
{
|
||||
|
||||
vector<string> sections = split(buffer, r.delimiter_char);
|
||||
for(unsigned int i = 0; i < sections.size(); i++)
|
||||
r.header.push_back(sections[i]);
|
||||
} else {
|
||||
for(unsigned int i = 0; i < buffer.length(); i++)
|
||||
{
|
||||
c = buffer[i];
|
||||
/*
|
||||
If the current character is a comma then we may have found the start of the next column
|
||||
however we do need to test if we are inside of a quote
|
||||
|
||||
If we aren't inside of a quote - store the value using the current header 'pointer' and keep scanning
|
||||
*/
|
||||
if (c == r.delimiter_char[0])
|
||||
{
|
||||
if (startquote)
|
||||
{
|
||||
buffer2 << c;
|
||||
continue;
|
||||
}
|
||||
if (!r.skipheader)
|
||||
{
|
||||
r[r.header[currentheader]] = buffer2.str();
|
||||
}
|
||||
else
|
||||
{
|
||||
r[ObjToString(currentheader)] = buffer2.str();
|
||||
}
|
||||
buffer2.str(string());
|
||||
currentheader++;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// If the character is a quote then we need to note this and use that to ignore commas
|
||||
// added logic to ignore whitespace before and after the whitespace
|
||||
if (c == '"')
|
||||
{
|
||||
if (startquote)
|
||||
{
|
||||
buffer2 << '"';
|
||||
buffer2.str(trim_left_copy(buffer2.str()));
|
||||
}
|
||||
if ( (((int)i-1) >= 0 && buffer[i-1] == '\\'))
|
||||
{
|
||||
buffer2 << c;
|
||||
continue;
|
||||
}
|
||||
startquote = !startquote;
|
||||
//find , and move i to it
|
||||
if (!startquote)
|
||||
{
|
||||
for(unsigned int x = i; x < buffer.length(); x++)
|
||||
{
|
||||
if (buffer[x] == r.delimiter_char[0] || x == buffer.length())
|
||||
{
|
||||
i = x-1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
buffer2 << c;
|
||||
}
|
||||
if (!r.skipheader)
|
||||
{
|
||||
r[r.header[currentheader]] = buffer2.str();
|
||||
} else {
|
||||
r[ObjToString(currentheader)] = buffer2.str();
|
||||
}
|
||||
}
|
||||
|
||||
return is;
|
||||
}
|
||||
}
|
49
src/stringhelper.cpp
Normal file
49
src/stringhelper.cpp
Normal file
@ -0,0 +1,49 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "stringhelper.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
vector<string> split(const string& s,
|
||||
const string& match,
|
||||
bool removeEmpty,
|
||||
bool fullMatch)
|
||||
{
|
||||
vector<string> result; // return container for tokens
|
||||
string::size_type start = 0, // starting position for searches
|
||||
skip = 1; // positions to skip after a match
|
||||
find_t pfind = &string::find_first_of; // search algorithm for matches
|
||||
|
||||
if (fullMatch)
|
||||
{
|
||||
// use the whole match string as a key
|
||||
// instead of individual characters
|
||||
// skip might be 0. see search loop comments
|
||||
skip = match.length();
|
||||
pfind = &string::find;
|
||||
}
|
||||
|
||||
while (start != string::npos)
|
||||
{
|
||||
// get a complete range [start..end)
|
||||
string::size_type end = (s.*pfind)(match, start);
|
||||
|
||||
// null strings always match in string::find, but
|
||||
// a skip of 0 causes infinite loops. pretend that
|
||||
// no tokens were found and extract the whole string
|
||||
if (skip == 0) end = string::npos;
|
||||
|
||||
string token = s.substr(start, end - start);
|
||||
|
||||
if (!(removeEmpty && token.empty()))
|
||||
{
|
||||
// extract the token and add it to the result list
|
||||
result.push_back(token);
|
||||
}
|
||||
|
||||
// start the next range
|
||||
if ((start = end) != string::npos) start += skip;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
122
tests.cpp
Normal file
122
tests.cpp
Normal file
@ -0,0 +1,122 @@
|
||||
#include <iostream>
|
||||
#include "csvpp.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace csvpp;
|
||||
int main()
|
||||
{
|
||||
int test = 1;
|
||||
// Test 1
|
||||
{
|
||||
RowReader tmp(",");
|
||||
stringstream ss;
|
||||
ss << "field1,field2,field3\r\n123,234,345\r\n999,000,111\r\n";
|
||||
ss >> tmp;
|
||||
rowiterator it;
|
||||
int row = 0;
|
||||
while(ss >> tmp)
|
||||
{
|
||||
switch (row)
|
||||
{
|
||||
case 0:
|
||||
if (tmp["field1"] != "123")
|
||||
{
|
||||
cerr << tmp["field1"] << endl;
|
||||
cerr << "Failed at row " << row << endl;
|
||||
cerr << "Failed at test " << test << endl;
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
if (tmp["field1"] != "999")
|
||||
{
|
||||
cerr << tmp["field1"] << endl;
|
||||
cerr << "Failed at row " << row << endl;
|
||||
cerr << "Failed at test " << test << endl;
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
row++;
|
||||
}
|
||||
}
|
||||
|
||||
test++;
|
||||
// Test 2
|
||||
{
|
||||
RowReader tmp;
|
||||
stringstream ss;
|
||||
ss << "field1,field2,field3\n123,234,345\n999,000,111\n";
|
||||
ss >> tmp;
|
||||
rowiterator it;
|
||||
int row = 0;
|
||||
while(ss >> tmp)
|
||||
{
|
||||
switch (row)
|
||||
{
|
||||
case 0:
|
||||
if (tmp["field1"] != "123")
|
||||
{
|
||||
cerr << tmp["field1"] << endl;
|
||||
cerr << "Failed at row " << row << endl;
|
||||
cerr << "Failed at test " << test << endl;
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
if (tmp["field1"] != "999")
|
||||
{
|
||||
cerr << tmp["field1"] << endl;
|
||||
cerr << "Failed at row " << row << endl;
|
||||
cerr << "Failed at test " << test << endl;
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
row++;
|
||||
}
|
||||
}
|
||||
|
||||
test++;
|
||||
// Test 3
|
||||
// Testing patch provided by Hanifa
|
||||
// https://code.google.com/p/csvpp/issues/detail?id=2
|
||||
{
|
||||
RowReader tmp("|");
|
||||
stringstream ss;
|
||||
ss << "field1|field2|field3\r\n123|234|345\r\n999|000|111\r\n";
|
||||
ss >> tmp;
|
||||
rowiterator it;
|
||||
int row = 0;
|
||||
|
||||
while(ss >> tmp)
|
||||
{
|
||||
switch (row)
|
||||
{
|
||||
case 0:
|
||||
if (tmp["field1"] != "123")
|
||||
{
|
||||
cerr << tmp["field1"] << endl;
|
||||
cerr << "Failed at row " << row << endl;
|
||||
cerr << "Failed at test " << test << endl;
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
if (tmp["field1"] != "999")
|
||||
{
|
||||
cerr << tmp["field1"] << endl;
|
||||
cerr << "Failed at row " << row << endl;
|
||||
cerr << "Failed at test " << test << endl;
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
row++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
cout << "All tests ran successfully" << endl;
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user