adding code

2021-10-22 15:30:52 -05:00 · 2021-10-22 15:30:52 -05:00 · 39cf9ae84e
commit 39cf9ae84e
parent c40ff89fbe
10 changed files with 1529 additions and 0 deletions
--- a/21
+++ b/21
@ -0,0 +1,21 @@
+CC=g++
+CFLAGS=-c -Wall -I./include
+LDFLAGS=-I./include
+SOURCES=main.cpp ./src/csvpp.cpp ./src/stringhelper.cpp
+TEST=tests.cpp ./src/csvpp.cpp ./src/stringhelper.cpp
+OBJECTS=$(SOURCES:.cpp=.o)
+EXECUTABLE=csvpp
+
+all: $(SOURCES) $(EXECUTABLE)
+
+$(EXECUTABLE): $(OBJECTS)
+		$(CC) $(LDFLAGS) $(OBJECTS) -o $@
+
+.cpp.o:
+		$(CC) $(CFLAGS) $< -o $@
+
+clean:
+		rm -f src/*o csvpp
+
+tests:
+		$(CC) $(LDFLAGS) ./src/csvpp.o ./src/stringhelper.o tests.cpp -o test
--- a/csv2.py
+++ b/csv2.py
@ -0,0 +1,64 @@
+
+class RowReader(dict):
+    header = []
+    skipheader = False
+    def __init__(self,skipheader=False,newline="\n",*arg,**kw):
+        super(RowReader, self).__init__(*arg, **kw)
+
+    def __rrshift__(self, stream):
+        strbuffer = ""
+        buffer2 = "" 
+        currentheader = 0
+        strbuffer = stream
+        strbuffer = strbuffer.strip()
+        c = ''
+        startquote = False
+        if (len(self.header) == 0 and not self.skipheader):
+            self.header = strbuffer.split(",")
+        else:
+            for i in range(0, len(strbuffer)):
+                c = strbuffer[i]
+                if c == ",":
+                    if startquote:
+                        buffer2 += c
+                        continue
+                    if not self.skipheader:
+                        self[self.header[currentheader]] = buffer2
+                    else:
+                        self[str(currentheader)] = buffer2
+                    buffer2 = "" 
+                    currentheader += 1
+                    continue
+
+                if c == "\"":
+                    if startquote:
+                        buffer2 += "\""
+                        buffer2 = buffer2.ltrim()
+                    if (i-1) >= 0 and buffer[i-1] == "\\":
+                        buffer2.write(c)
+                        continue
+                    startquote = not startquote
+                    if not startquote:
+                        for x in range(i, len(strbuffer)):
+                            if strbuffer[x] == ',' or x == len(strbuffer):
+                                i = x - 1
+                                break
+
+                buffer2 += c
+            if not self.skipheader:
+                self[self.header[currentheader]] = buffer2
+            else:
+                self[str(currentheader)] = buffer2
+        return stream
+
+class RowWriter(list):
+    def __init__(self,*arg,**kw):
+        super(RowWriter, self).__init__(*arg, **kw)
+	pass
+
+f = open("sampledata.csv")
+r = RowReader()
+for l in f:
+    l >> r
+    if r != {}:
+        print str(r)
--- a/csvtest.py
+++ b/csvtest.py
@ -0,0 +1,23 @@
+import csv
+
+ifile = open("sampledata.csv")
+reader = csv.DictReader(ifile)
+
+rownum = 0
+header = []
+
+for r in reader:
+	print r
+#for row in reader:
+#	if rownum == 0:
+#		header = row
+#	else:
+#		colnum = 0
+#		for col in row:
+#			print "%s => %s" % (header[colnum], col)
+#			colnum += 1
+#
+#	print "\n"
+#	rownum += 1
+
+ifile.close()
--- a/include/csvpp.h
+++ b/include/csvpp.h
@ -0,0 +1,42 @@
+#ifndef CSVPP_H
+#define CSVPP_H
+
+#include <string>
+#include <vector>
+#include <map>
+#include <sstream>
+#include <istream>
+
+
+#define VERSION "2.2"
+
+namespace csvpp {
+
+	class RowWriter;
+
+	class RowReader : public std::map<std::string, std::string> {
+		private:
+			std::vector<std::string> header;
+			bool skipheader;
+			std::string delimiter_char; // this is a string because the split function helper is expecting a string, but really this is just a char
+		public:
+			const char * newline;
+			// Adding support for custom delimiter character
+			// Based on the patch by Hanifa
+			// https://code.google.com/p/csvpp/issues/detail?id=2
+			RowReader(std::string delimiter_char = ",", bool skipheader=false,const char * newline="\n") : delimiter_char(delimiter_char), skipheader(skipheader), newline(newline) { }
+			void clear() { header.clear(); }
+			friend std::istream & operator>>(std::istream & os, RowReader & r);
+			friend std::ostream & operator<<(std::ostream & os, const RowWriter & r);
+	};
+
+	class RowWriter : public std::vector<RowReader>
+	{
+		public:
+			friend std::ostream & operator<<(std::ostream & os, const RowWriter & r);
+	};
+	
+	typedef RowReader::const_iterator rowiterator;
+}
+
+#endif
--- a/include/stringhelper.h
+++ b/include/stringhelper.h
@ -0,0 +1,46 @@
+#ifndef STRINGHELPER_H
+#define STRINGHELPER_H
+#include <string>
+#include <vector>
+#include <iterator>
+#include <sstream>
+//Borrowed from http://www.cplusplus.com/faq/sequences/strings/trim/
+inline std::string trim_right_copy(
+  const std::string& s,
+  const std::string& delimiters = " \f\n\r\t\v" )
+{
+  return s.substr( 0, s.find_last_not_of( delimiters ) + 1 );
+}
+
+inline std::string trim_left_copy(
+  const std::string& s,
+  const std::string& delimiters = " \f\n\r\t\v" )
+{
+  return s.substr( s.find_first_not_of( delimiters ) );
+}
+
+inline std::string trim_copy(
+  const std::string& s,
+  const std::string& delimiters = " \f\n\r\t\v" )
+{
+  return trim_left_copy( trim_right_copy( s, delimiters ), delimiters );
+}
+
+typedef std::string::size_type (std::string::*find_t)(const std::string& delim,
+                                                std::string::size_type offset) const;
+
+std::vector<std::string> split(const std::string& s,
+                         const std::string& match,
+                         bool removeEmpty=false,
+                         bool fullMatch=false);
+
+//http://stackoverflow.com/a/13636164/195722
+template <typename T>
+  std::string ObjToString ( T obj )
+  {
+     std::ostringstream ss;
+     ss << obj;
+     return ss.str();
+  }
+
+#endif
--- a/main.cpp
+++ b/main.cpp
@ -0,0 +1,23 @@
+#include <iostream>
+#include <sstream>
+#include "csvpp.h"
+
+using namespace std;
+using namespace csvpp;
+
+int main()
+{
+        RowReader tmp;
+        stringstream ss;
+        ss << "field1,field2,field3\r\n123,234,345\r\n999,000,111\r\n";
+        ss >> tmp;
+        rowiterator it;
+        while(ss >> tmp)
+        {
+                for(it = tmp.begin(); it != tmp.end(); it++)
+                        cout << it->first << " => " << it->second << endl;
+                cout << endl;
+        }
+        return 0;
+
+}
--- a/sampledata.csv
+++ b/sampledata.csv
--- a/src/csvpp.cpp
+++ b/src/csvpp.cpp
@ -0,0 +1,139 @@
+#include <string>
+#include <iostream>
+#include <istream>
+#include <fstream>
+#include <sstream>
+#include <algorithm>
+#include "csvpp.h"
+#include "stringhelper.h"
+
+using namespace std;
+using namespace csvpp;
+
+namespace csvpp {
+
+	std::ostream & operator<<(std::ostream & os, const RowWriter & r)
+	{
+		
+		rowiterator it;
+		if (r.size() == 0)
+			return os;
+
+		if (!r[0].skipheader)
+		{
+			for(unsigned int i = 0; i < r[0].header.size() - 1; i++)
+			{
+				os << r[0].header[i] << r[0].delimiter_char;;
+			}
+			os << r[0].header[r[0].header.size() - 1] << r[0].newline;
+		}
+		for(unsigned int i = 0; i < r.size(); i++)
+		{
+			for(it = r[i].begin(); it != r[i].end(); it++)
+			{
+				if (distance(r[i].begin(), it) != (int)(r[i].size() - 1))
+					os << it->second << r[i].delimiter_char;
+				else
+					os << it->second;
+			}
+			if (!r[0].skipheader && i != 0)
+				os << r[i].newline;
+		}
+
+		return os;
+	}
+
+	std::istream & operator>>(std::istream & is, RowReader & r) 
+	{ 
+		string buffer;
+		stringstream buffer2;
+		int currentheader = 0;
+		getline(is, buffer);
+		// Patch by damienlmoore - https://code.google.com/p/csvpp/issues/detail?id=1
+		if(!is.good() || is.eof())
+		{
+			return is;
+		}
+		
+		buffer = trim_copy(buffer);
+		char c;
+		bool startquote = false;
+		if(r.header.size() == 0 && !r.skipheader)
+		{
+			
+			vector<string> sections = split(buffer, r.delimiter_char);
+			for(unsigned int i = 0; i < sections.size(); i++)
+				r.header.push_back(sections[i]);
+		} else {
+			for(unsigned int i = 0; i < buffer.length(); i++)
+			{
+				c = buffer[i];
+				/*
+					If the current character is a comma then we may have found the start of the next column
+					however we do need to test if we are inside of a quote
+
+					If we aren't inside of a quote - store the value using the current header 'pointer' and keep scanning
+				*/
+				if (c == r.delimiter_char[0])
+				{
+					if (startquote)
+					{
+						buffer2 << c;
+						continue;
+					}
+					if (!r.skipheader)
+					{
+						r[r.header[currentheader]] = buffer2.str();
+					}
+					else
+					{
+						r[ObjToString(currentheader)] = buffer2.str();
+					}
+					buffer2.str(string());
+					currentheader++;
+					continue;
+				}
+
+
+				// If the character is a quote then we need to note this and use that to ignore commas
+				// added logic to ignore whitespace before and after the whitespace
+				if (c == '"')
+				{
+					if (startquote)
+					{
+						buffer2 << '"';
+						buffer2.str(trim_left_copy(buffer2.str()));
+					}
+					if ( (((int)i-1) >= 0 && buffer[i-1] == '\\'))
+					{
+						buffer2 << c;
+						continue;
+					}
+					startquote = !startquote;
+					//find , and move i to it
+					if (!startquote)
+					{
+						for(unsigned int x = i; x < buffer.length(); x++)
+						{
+							if (buffer[x] == r.delimiter_char[0] || x == buffer.length())
+							{
+								i = x-1;
+								break;
+							}
+						}
+					}
+				}
+
+				buffer2 << c;
+			}
+			if (!r.skipheader)
+			{
+				r[r.header[currentheader]] = buffer2.str();
+			} else {
+				r[ObjToString(currentheader)] = buffer2.str();
+			}
+		}
+		
+		return is; 
+	}
+}
--- a/src/stringhelper.cpp
+++ b/src/stringhelper.cpp
@ -0,0 +1,49 @@
+#include <string>
+#include <vector>
+#include "stringhelper.h"
+
+using namespace std;
+
+vector<string> split(const string& s,
+                         const string& match,
+                         bool removeEmpty,
+                         bool fullMatch)
+{
+        vector<string> result;                 // return container for tokens
+        string::size_type start = 0,           // starting position for searches
+                          skip = 1;            // positions to skip after a match
+        find_t pfind = &string::find_first_of; // search algorithm for matches
+
+        if (fullMatch)
+        {
+            // use the whole match string as a key
+            // instead of individual characters
+            // skip might be 0. see search loop comments
+            skip = match.length();
+            pfind = &string::find;
+        }
+
+        while (start != string::npos)
+        {
+            // get a complete range [start..end)
+            string::size_type end = (s.*pfind)(match, start);
+
+            // null strings always match in string::find, but
+            // a skip of 0 causes infinite loops. pretend that
+            // no tokens were found and extract the whole string
+            if (skip == 0) end = string::npos;
+
+            string token = s.substr(start, end - start);
+
+            if (!(removeEmpty && token.empty()))
+            {
+                // extract the token and add it to the result list
+                result.push_back(token);
+            }
+
+            // start the next range
+            if ((start = end) != string::npos) start += skip;
+        }
+
+        return result;
+}
--- a/tests.cpp
+++ b/tests.cpp
@ -0,0 +1,122 @@
+#include <iostream>
+#include "csvpp.h"
+
+using namespace std;
+using namespace csvpp;
+int main()
+{
+	int test = 1;
+	// Test 1
+	{
+        RowReader tmp(",");
+        stringstream ss;
+        ss << "field1,field2,field3\r\n123,234,345\r\n999,000,111\r\n";
+        ss >> tmp;
+        rowiterator it;
+        int row = 0;
+        while(ss >> tmp)
+        {
+                switch (row)
+                {
+                        case 0:
+                                if (tmp["field1"] != "123")
+                                {
+                                        cerr << tmp["field1"] << endl;
+                                        cerr << "Failed at row " << row << endl;
+										cerr << "Failed at test " << test << endl;
+                                        return 1;
+                                }
+                                break;
+                        case 1:
+                                if (tmp["field1"] != "999")
+                                {
+										cerr << tmp["field1"] << endl;
+                                        cerr << "Failed at row " << row << endl;
+										cerr << "Failed at test " << test << endl;
+                                        return 1;
+                                }
+                                break;
+                }
+                row++;
+        }
+	}
+	
+	test++;
+	// Test 2
+	{
+        RowReader tmp;
+        stringstream ss;
+        ss << "field1,field2,field3\n123,234,345\n999,000,111\n";
+        ss >> tmp;
+        rowiterator it;
+        int row = 0;
+        while(ss >> tmp)
+        {
+                switch (row)
+                {
+                        case 0:
+                                if (tmp["field1"] != "123")
+                                {
+                                        cerr << tmp["field1"] << endl;
+                                        cerr << "Failed at row " << row << endl;
+										cerr << "Failed at test " << test << endl;
+                                        return 1;
+                                }
+                                break;
+                        case 1:
+                                if (tmp["field1"] != "999")
+                                {
+										cerr << tmp["field1"] << endl;
+                                        cerr << "Failed at row " << row << endl;
+										cerr << "Failed at test " << test << endl;
+                                        return 1;
+                                }
+                                break;
+                }
+                row++;
+        }
+	}
+	
+	test++;
+	// Test 3
+	// Testing patch provided by Hanifa
+	// https://code.google.com/p/csvpp/issues/detail?id=2
+	{
+		RowReader tmp("|");
+        stringstream ss;
+        ss << "field1|field2|field3\r\n123|234|345\r\n999|000|111\r\n";
+        ss >> tmp;
+        rowiterator it;
+        int row = 0;
+		
+		while(ss >> tmp)
+        {
+                switch (row)
+                {
+                        case 0:
+                                if (tmp["field1"] != "123")
+                                {
+                                        cerr << tmp["field1"] << endl;
+                                        cerr << "Failed at row " << row << endl;
+										cerr << "Failed at test " << test << endl;
+                                        return 1;
+                                }
+                                break;
+                        case 1:
+                                if (tmp["field1"] != "999")
+                                {
+										cerr << tmp["field1"] << endl;
+                                        cerr << "Failed at row " << row << endl;
+										cerr << "Failed at test " << test << endl;
+                                        return 1;
+                                }
+                                break;
+                }
+                row++;
+        }
+	
+	}
+
+        cout << "All tests ran successfully" << endl;
+    return 0;
+}