adding code

master
Natalie Adams 2021-10-22 15:30:52 -05:00
parent c40ff89fbe
commit 39cf9ae84e
10 changed files with 1529 additions and 0 deletions

21
Makefile 100644
View File

@ -0,0 +1,21 @@
CC=g++
CFLAGS=-c -Wall -I./include
LDFLAGS=-I./include
SOURCES=main.cpp ./src/csvpp.cpp ./src/stringhelper.cpp
TEST=tests.cpp ./src/csvpp.cpp ./src/stringhelper.cpp
OBJECTS=$(SOURCES:.cpp=.o)
EXECUTABLE=csvpp
all: $(SOURCES) $(EXECUTABLE)
$(EXECUTABLE): $(OBJECTS)
$(CC) $(LDFLAGS) $(OBJECTS) -o $@
.cpp.o:
$(CC) $(CFLAGS) $< -o $@
clean:
rm -f src/*o csvpp
tests:
$(CC) $(LDFLAGS) ./src/csvpp.o ./src/stringhelper.o tests.cpp -o test

64
csv2.py 100644
View File

@ -0,0 +1,64 @@
class RowReader(dict):
header = []
skipheader = False
def __init__(self,skipheader=False,newline="\n",*arg,**kw):
super(RowReader, self).__init__(*arg, **kw)
def __rrshift__(self, stream):
strbuffer = ""
buffer2 = ""
currentheader = 0
strbuffer = stream
strbuffer = strbuffer.strip()
c = ''
startquote = False
if (len(self.header) == 0 and not self.skipheader):
self.header = strbuffer.split(",")
else:
for i in range(0, len(strbuffer)):
c = strbuffer[i]
if c == ",":
if startquote:
buffer2 += c
continue
if not self.skipheader:
self[self.header[currentheader]] = buffer2
else:
self[str(currentheader)] = buffer2
buffer2 = ""
currentheader += 1
continue
if c == "\"":
if startquote:
buffer2 += "\""
buffer2 = buffer2.ltrim()
if (i-1) >= 0 and buffer[i-1] == "\\":
buffer2.write(c)
continue
startquote = not startquote
if not startquote:
for x in range(i, len(strbuffer)):
if strbuffer[x] == ',' or x == len(strbuffer):
i = x - 1
break
buffer2 += c
if not self.skipheader:
self[self.header[currentheader]] = buffer2
else:
self[str(currentheader)] = buffer2
return stream
class RowWriter(list):
def __init__(self,*arg,**kw):
super(RowWriter, self).__init__(*arg, **kw)
pass
f = open("sampledata.csv")
r = RowReader()
for l in f:
l >> r
if r != {}:
print str(r)

23
csvtest.py 100644
View File

@ -0,0 +1,23 @@
import csv
ifile = open("sampledata.csv")
reader = csv.DictReader(ifile)
rownum = 0
header = []
for r in reader:
print r
#for row in reader:
# if rownum == 0:
# header = row
# else:
# colnum = 0
# for col in row:
# print "%s => %s" % (header[colnum], col)
# colnum += 1
#
# print "\n"
# rownum += 1
ifile.close()

42
include/csvpp.h 100644
View File

@ -0,0 +1,42 @@
#ifndef CSVPP_H
#define CSVPP_H
#include <string>
#include <vector>
#include <map>
#include <sstream>
#include <istream>
#define VERSION "2.2"
namespace csvpp {
class RowWriter;
class RowReader : public std::map<std::string, std::string> {
private:
std::vector<std::string> header;
bool skipheader;
std::string delimiter_char; // this is a string because the split function helper is expecting a string, but really this is just a char
public:
const char * newline;
// Adding support for custom delimiter character
// Based on the patch by Hanifa
// https://code.google.com/p/csvpp/issues/detail?id=2
RowReader(std::string delimiter_char = ",", bool skipheader=false,const char * newline="\n") : delimiter_char(delimiter_char), skipheader(skipheader), newline(newline) { }
void clear() { header.clear(); }
friend std::istream & operator>>(std::istream & os, RowReader & r);
friend std::ostream & operator<<(std::ostream & os, const RowWriter & r);
};
class RowWriter : public std::vector<RowReader>
{
public:
friend std::ostream & operator<<(std::ostream & os, const RowWriter & r);
};
typedef RowReader::const_iterator rowiterator;
}
#endif

View File

@ -0,0 +1,46 @@
#ifndef STRINGHELPER_H
#define STRINGHELPER_H
#include <string>
#include <vector>
#include <iterator>
#include <sstream>
//Borrowed from http://www.cplusplus.com/faq/sequences/strings/trim/
inline std::string trim_right_copy(
const std::string& s,
const std::string& delimiters = " \f\n\r\t\v" )
{
return s.substr( 0, s.find_last_not_of( delimiters ) + 1 );
}
inline std::string trim_left_copy(
const std::string& s,
const std::string& delimiters = " \f\n\r\t\v" )
{
return s.substr( s.find_first_not_of( delimiters ) );
}
inline std::string trim_copy(
const std::string& s,
const std::string& delimiters = " \f\n\r\t\v" )
{
return trim_left_copy( trim_right_copy( s, delimiters ), delimiters );
}
typedef std::string::size_type (std::string::*find_t)(const std::string& delim,
std::string::size_type offset) const;
std::vector<std::string> split(const std::string& s,
const std::string& match,
bool removeEmpty=false,
bool fullMatch=false);
//http://stackoverflow.com/a/13636164/195722
template <typename T>
std::string ObjToString ( T obj )
{
std::ostringstream ss;
ss << obj;
return ss.str();
}
#endif

23
main.cpp 100644
View File

@ -0,0 +1,23 @@
#include <iostream>
#include <sstream>
#include "csvpp.h"
using namespace std;
using namespace csvpp;
int main()
{
RowReader tmp;
stringstream ss;
ss << "field1,field2,field3\r\n123,234,345\r\n999,000,111\r\n";
ss >> tmp;
rowiterator it;
while(ss >> tmp)
{
for(it = tmp.begin(); it != tmp.end(); it++)
cout << it->first << " => " << it->second << endl;
cout << endl;
}
return 0;
}

1000
sampledata.csv 100644

File diff suppressed because it is too large Load Diff

139
src/csvpp.cpp 100644
View File

@ -0,0 +1,139 @@
#include <string>
#include <iostream>
#include <istream>
#include <fstream>
#include <sstream>
#include <algorithm>
#include "csvpp.h"
#include "stringhelper.h"
using namespace std;
using namespace csvpp;
namespace csvpp {
std::ostream & operator<<(std::ostream & os, const RowWriter & r)
{
rowiterator it;
if (r.size() == 0)
return os;
if (!r[0].skipheader)
{
for(unsigned int i = 0; i < r[0].header.size() - 1; i++)
{
os << r[0].header[i] << r[0].delimiter_char;;
}
os << r[0].header[r[0].header.size() - 1] << r[0].newline;
}
for(unsigned int i = 0; i < r.size(); i++)
{
for(it = r[i].begin(); it != r[i].end(); it++)
{
if (distance(r[i].begin(), it) != (int)(r[i].size() - 1))
os << it->second << r[i].delimiter_char;
else
os << it->second;
}
if (!r[0].skipheader && i != 0)
os << r[i].newline;
}
return os;
}
std::istream & operator>>(std::istream & is, RowReader & r)
{
string buffer;
stringstream buffer2;
int currentheader = 0;
getline(is, buffer);
// Patch by damienlmoore - https://code.google.com/p/csvpp/issues/detail?id=1
if(!is.good() || is.eof())
{
return is;
}
buffer = trim_copy(buffer);
char c;
bool startquote = false;
if(r.header.size() == 0 && !r.skipheader)
{
vector<string> sections = split(buffer, r.delimiter_char);
for(unsigned int i = 0; i < sections.size(); i++)
r.header.push_back(sections[i]);
} else {
for(unsigned int i = 0; i < buffer.length(); i++)
{
c = buffer[i];
/*
If the current character is a comma then we may have found the start of the next column
however we do need to test if we are inside of a quote
If we aren't inside of a quote - store the value using the current header 'pointer' and keep scanning
*/
if (c == r.delimiter_char[0])
{
if (startquote)
{
buffer2 << c;
continue;
}
if (!r.skipheader)
{
r[r.header[currentheader]] = buffer2.str();
}
else
{
r[ObjToString(currentheader)] = buffer2.str();
}
buffer2.str(string());
currentheader++;
continue;
}
// If the character is a quote then we need to note this and use that to ignore commas
// added logic to ignore whitespace before and after the whitespace
if (c == '"')
{
if (startquote)
{
buffer2 << '"';
buffer2.str(trim_left_copy(buffer2.str()));
}
if ( (((int)i-1) >= 0 && buffer[i-1] == '\\'))
{
buffer2 << c;
continue;
}
startquote = !startquote;
//find , and move i to it
if (!startquote)
{
for(unsigned int x = i; x < buffer.length(); x++)
{
if (buffer[x] == r.delimiter_char[0] || x == buffer.length())
{
i = x-1;
break;
}
}
}
}
buffer2 << c;
}
if (!r.skipheader)
{
r[r.header[currentheader]] = buffer2.str();
} else {
r[ObjToString(currentheader)] = buffer2.str();
}
}
return is;
}
}

View File

@ -0,0 +1,49 @@
#include <string>
#include <vector>
#include "stringhelper.h"
using namespace std;
vector<string> split(const string& s,
const string& match,
bool removeEmpty,
bool fullMatch)
{
vector<string> result; // return container for tokens
string::size_type start = 0, // starting position for searches
skip = 1; // positions to skip after a match
find_t pfind = &string::find_first_of; // search algorithm for matches
if (fullMatch)
{
// use the whole match string as a key
// instead of individual characters
// skip might be 0. see search loop comments
skip = match.length();
pfind = &string::find;
}
while (start != string::npos)
{
// get a complete range [start..end)
string::size_type end = (s.*pfind)(match, start);
// null strings always match in string::find, but
// a skip of 0 causes infinite loops. pretend that
// no tokens were found and extract the whole string
if (skip == 0) end = string::npos;
string token = s.substr(start, end - start);
if (!(removeEmpty && token.empty()))
{
// extract the token and add it to the result list
result.push_back(token);
}
// start the next range
if ((start = end) != string::npos) start += skip;
}
return result;
}

122
tests.cpp 100644
View File

@ -0,0 +1,122 @@
#include <iostream>
#include "csvpp.h"
using namespace std;
using namespace csvpp;
int main()
{
int test = 1;
// Test 1
{
RowReader tmp(",");
stringstream ss;
ss << "field1,field2,field3\r\n123,234,345\r\n999,000,111\r\n";
ss >> tmp;
rowiterator it;
int row = 0;
while(ss >> tmp)
{
switch (row)
{
case 0:
if (tmp["field1"] != "123")
{
cerr << tmp["field1"] << endl;
cerr << "Failed at row " << row << endl;
cerr << "Failed at test " << test << endl;
return 1;
}
break;
case 1:
if (tmp["field1"] != "999")
{
cerr << tmp["field1"] << endl;
cerr << "Failed at row " << row << endl;
cerr << "Failed at test " << test << endl;
return 1;
}
break;
}
row++;
}
}
test++;
// Test 2
{
RowReader tmp;
stringstream ss;
ss << "field1,field2,field3\n123,234,345\n999,000,111\n";
ss >> tmp;
rowiterator it;
int row = 0;
while(ss >> tmp)
{
switch (row)
{
case 0:
if (tmp["field1"] != "123")
{
cerr << tmp["field1"] << endl;
cerr << "Failed at row " << row << endl;
cerr << "Failed at test " << test << endl;
return 1;
}
break;
case 1:
if (tmp["field1"] != "999")
{
cerr << tmp["field1"] << endl;
cerr << "Failed at row " << row << endl;
cerr << "Failed at test " << test << endl;
return 1;
}
break;
}
row++;
}
}
test++;
// Test 3
// Testing patch provided by Hanifa
// https://code.google.com/p/csvpp/issues/detail?id=2
{
RowReader tmp("|");
stringstream ss;
ss << "field1|field2|field3\r\n123|234|345\r\n999|000|111\r\n";
ss >> tmp;
rowiterator it;
int row = 0;
while(ss >> tmp)
{
switch (row)
{
case 0:
if (tmp["field1"] != "123")
{
cerr << tmp["field1"] << endl;
cerr << "Failed at row " << row << endl;
cerr << "Failed at test " << test << endl;
return 1;
}
break;
case 1:
if (tmp["field1"] != "999")
{
cerr << tmp["field1"] << endl;
cerr << "Failed at row " << row << endl;
cerr << "Failed at test " << test << endl;
return 1;
}
break;
}
row++;
}
}
cout << "All tests ran successfully" << endl;
return 0;
}