AMO-Tools-Suite  v.0.9.0
Set of tools for calculating energy efficiency in industrial equipment
All Classes Namespaces Files Functions Variables Enumerations Friends Macros Pages
csv.h
1 // Copyright: (2012-2015) Ben Strasser <code@ben-strasser.net>
2 // License: BSD-3
3 //
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are met:
8 //
9 // 1. Redistributions of source code must retain the above copyright notice,
10 // this list of conditions and the following disclaimer.
11 //
12 //2. Redistributions in binary form must reproduce the above copyright notice,
13 // this list of conditions and the following disclaimer in the documentation
14 // and/or other materials provided with the distribution.
15 //
16 //3. Neither the name of the copyright holder nor the names of its contributors
17 // may be used to endorse or promote products derived from this software
18 // without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 // POSSIBILITY OF SUCH DAMAGE.
31 
32 #ifndef CSV_H
33 #define CSV_H
34 
35 #include <vector>
36 #include <string>
37 #include <cstring>
38 #include <algorithm>
39 #include <utility>
40 #include <cstdio>
41 #include <exception>
42 #ifndef CSV_IO_NO_THREAD
43 #include <mutex>
44 #include <thread>
45 #include <condition_variable>
46 #endif
47 #include <memory>
48 #include <cassert>
49 #include <cerrno>
50 #include <istream>
51 
52 namespace io{
54  // LineReader //
56 
57  namespace error{
58  struct base : std::exception{
59  virtual void format_error_message()const = 0;
60 
61  const char*what()const noexcept override{
62  format_error_message();
63  return error_message_buffer;
64  }
65 
66  mutable char error_message_buffer[512];
67  };
68 
69  const int max_file_name_length = 255;
70 
71  struct with_file_name{
72  with_file_name(){
73  std::memset(file_name, 0, sizeof(file_name));
74  }
75 
76  void set_file_name(const char*file_name){
77  if(file_name != nullptr){
78  strncpy(this->file_name, file_name, sizeof(this->file_name));
79  this->file_name[sizeof(this->file_name)-1] = '\0';
80  }else{
81  this->file_name[0] = '\0';
82  }
83  }
84 
85  char file_name[max_file_name_length+1];
86  };
87 
88  struct with_file_line{
89  with_file_line(){
90  file_line = -1;
91  }
92 
93  void set_file_line(int file_line){
94  this->file_line = file_line;
95  }
96 
97  int file_line;
98  };
99 
100  struct with_errno{
101  with_errno(){
102  errno_value = 0;
103  }
104 
105  void set_errno(int errno_value){
106  this->errno_value = errno_value;
107  }
108 
109  int errno_value;
110  };
111 
112  struct can_not_open_file :
113  base,
114  with_file_name,
115  with_errno{
116  void format_error_message()const override{
117  if(errno_value != 0)
118  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
119  "Can not open file \"%s\" because \"%s\"."
120  , file_name, std::strerror(errno_value));
121  else
122  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
123  "Can not open file \"%s\"."
124  , file_name);
125  }
126  };
127 
128  struct line_length_limit_exceeded :
129  base,
130  with_file_name,
131  with_file_line{
132  void format_error_message()const override{
133  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
134  "Line number %d in file \"%s\" exceeds the maximum length of 2^24-1."
135  , file_line, file_name);
136  }
137  };
138  }
139 
140  class ByteSourceBase{
141  public:
142  virtual int read(char*buffer, int size)=0;
143  virtual ~ByteSourceBase(){}
144  };
145 
146  namespace detail{
147 
148  class OwningStdIOByteSourceBase : public ByteSourceBase{
149  public:
150  explicit OwningStdIOByteSourceBase(FILE*file):file(file){
151  // Tell the std library that we want to do the buffering ourself.
152  std::setvbuf(file, 0, _IONBF, 0);
153  }
154 
155  int read(char*buffer, int size){
156  return std::fread(buffer, 1, size, file);
157  }
158 
159  ~OwningStdIOByteSourceBase(){
160  std::fclose(file);
161  }
162 
163  private:
164  FILE*file;
165  };
166 
167  class NonOwningIStreamByteSource : public ByteSourceBase{
168  public:
169  explicit NonOwningIStreamByteSource(std::istream&in):in(in){}
170 
171  int read(char*buffer, int size){
172  in.read(buffer, size);
173  return in.gcount();
174  }
175 
176  ~NonOwningIStreamByteSource(){}
177 
178  private:
179  std::istream&in;
180  };
181 
182  class NonOwningStringByteSource : public ByteSourceBase{
183  public:
184  NonOwningStringByteSource(const char*str, long long size):str(str), remaining_byte_count(size){}
185 
186  int read(char*buffer, int desired_byte_count){
187  int to_copy_byte_count = desired_byte_count;
188  if(remaining_byte_count < to_copy_byte_count)
189  to_copy_byte_count = remaining_byte_count;
190  std::memcpy(buffer, str, to_copy_byte_count);
191  remaining_byte_count -= to_copy_byte_count;
192  str += to_copy_byte_count;
193  return to_copy_byte_count;
194  }
195 
196  ~NonOwningStringByteSource(){}
197 
198  private:
199  const char*str;
200  long long remaining_byte_count;
201  };
202 
203  #ifndef CSV_IO_NO_THREAD
204  class AsynchronousReader{
205  public:
206  void init(std::unique_ptr<ByteSourceBase>arg_byte_source){
207  std::unique_lock<std::mutex>guard(lock);
208  byte_source = std::move(arg_byte_source);
209  desired_byte_count = -1;
210  termination_requested = false;
211  worker = std::thread(
212  [&]{
213  std::unique_lock<std::mutex>guard(lock);
214  try{
215  for(;;){
216  read_requested_condition.wait(
217  guard,
218  [&]{
219  return desired_byte_count != -1 || termination_requested;
220  }
221  );
222  if(termination_requested)
223  return;
224 
225  read_byte_count = byte_source->read(buffer, desired_byte_count);
226  desired_byte_count = -1;
227  if(read_byte_count == 0)
228  break;
229  read_finished_condition.notify_one();
230  }
231  }catch(...){
232  read_error = std::current_exception();
233  }
234  read_finished_condition.notify_one();
235  }
236  );
237  }
238 
239  bool is_valid()const{
240  return byte_source != nullptr;
241  }
242 
243  void start_read(char*arg_buffer, int arg_desired_byte_count){
244  std::unique_lock<std::mutex>guard(lock);
245  buffer = arg_buffer;
246  desired_byte_count = arg_desired_byte_count;
247  read_byte_count = -1;
248  read_requested_condition.notify_one();
249  }
250 
251  int finish_read(){
252  std::unique_lock<std::mutex>guard(lock);
253  read_finished_condition.wait(
254  guard,
255  [&]{
256  return read_byte_count != -1 || read_error;
257  }
258  );
259  if(read_error)
260  std::rethrow_exception(read_error);
261  else
262  return read_byte_count;
263  }
264 
265  ~AsynchronousReader(){
266  if(byte_source != nullptr){
267  {
268  std::unique_lock<std::mutex>guard(lock);
269  termination_requested = true;
270  }
271  read_requested_condition.notify_one();
272  worker.join();
273  }
274  }
275 
276  private:
277  std::unique_ptr<ByteSourceBase>byte_source;
278 
279  std::thread worker;
280 
281  bool termination_requested;
282  std::exception_ptr read_error;
283  char*buffer;
284  int desired_byte_count;
285  int read_byte_count;
286 
287  std::mutex lock;
288  std::condition_variable read_finished_condition;
289  std::condition_variable read_requested_condition;
290  };
291  #endif
292 
293  class SynchronousReader{
294  public:
295  void init(std::unique_ptr<ByteSourceBase>arg_byte_source){
296  byte_source = std::move(arg_byte_source);
297  }
298 
299  bool is_valid()const{
300  return byte_source != nullptr;
301  }
302 
303  void start_read(char*arg_buffer, int arg_desired_byte_count){
304  buffer = arg_buffer;
305  desired_byte_count = arg_desired_byte_count;
306  }
307 
308  int finish_read(){
309  return byte_source->read(buffer, desired_byte_count);
310  }
311  private:
312  std::unique_ptr<ByteSourceBase>byte_source;
313  char*buffer;
314  int desired_byte_count;
315  };
316  }
317 
318  class LineReader{
319  private:
320  static const int block_len = 1<<20;
321  std::unique_ptr<char[]>buffer; // must be constructed before (and thus destructed after) the reader!
322  #ifdef CSV_IO_NO_THREAD
323  detail::SynchronousReader reader;
324  #else
325  detail::AsynchronousReader reader;
326  #endif
327  int data_begin;
328  int data_end;
329 
330  char file_name[error::max_file_name_length+1];
331  unsigned file_line;
332 
333  static std::unique_ptr<ByteSourceBase> open_file(const char*file_name){
334  // We open the file in binary mode as it makes no difference under *nix
335  // and under Windows we handle \r\n newlines ourself.
336  FILE*file = std::fopen(file_name, "rb");
337  if(file == 0){
338  int x = errno; // store errno as soon as possible, doing it after constructor call can fail.
339  error::can_not_open_file err;
340  err.set_errno(x);
341  err.set_file_name(file_name);
342  throw err;
343  }
344  return std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file));
345  }
346 
347  void init(std::unique_ptr<ByteSourceBase>byte_source){
348  file_line = 0;
349 
350  buffer = std::unique_ptr<char[]>(new char[3*block_len]);
351  data_begin = 0;
352  data_end = byte_source->read(buffer.get(), 2*block_len);
353 
354  // Ignore UTF-8 BOM
355  if(data_end >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF')
356  data_begin = 3;
357 
358  if(data_end == 2*block_len){
359  reader.init(std::move(byte_source));
360  reader.start_read(buffer.get() + 2*block_len, block_len);
361  }
362  }
363 
364  public:
365  LineReader() = delete;
366  LineReader(const LineReader&) = delete;
367  LineReader&operator=(const LineReader&) = delete;
368 
369  explicit LineReader(const char*file_name){
370  set_file_name(file_name);
371  init(open_file(file_name));
372  }
373 
374  explicit LineReader(const std::string&file_name){
375  set_file_name(file_name.c_str());
376  init(open_file(file_name.c_str()));
377  }
378 
379  LineReader(const char*file_name, std::unique_ptr<ByteSourceBase>byte_source){
380  set_file_name(file_name);
381  init(std::move(byte_source));
382  }
383 
384  LineReader(const std::string&file_name, std::unique_ptr<ByteSourceBase>byte_source){
385  set_file_name(file_name.c_str());
386  init(std::move(byte_source));
387  }
388 
389  LineReader(const char*file_name, const char*data_begin, const char*data_end){
390  set_file_name(file_name);
391  init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningStringByteSource(data_begin, data_end-data_begin)));
392  }
393 
394  LineReader(const std::string&file_name, const char*data_begin, const char*data_end){
395  set_file_name(file_name.c_str());
396  init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningStringByteSource(data_begin, data_end-data_begin)));
397  }
398 
399  LineReader(const char*file_name, FILE*file){
400  set_file_name(file_name);
401  init(std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file)));
402  }
403 
404  LineReader(const std::string&file_name, FILE*file){
405  set_file_name(file_name.c_str());
406  init(std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file)));
407  }
408 
409  LineReader(const char*file_name, std::istream&in){
410  set_file_name(file_name);
411  init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningIStreamByteSource(in)));
412  }
413 
414  LineReader(const std::string&file_name, std::istream&in){
415  set_file_name(file_name.c_str());
416  init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningIStreamByteSource(in)));
417  }
418 
419  void set_file_name(const std::string&file_name){
420  set_file_name(file_name.c_str());
421  }
422 
423  void set_file_name(const char*file_name){
424  if(file_name != nullptr){
425  strncpy(this->file_name, file_name, sizeof(this->file_name));
426  this->file_name[sizeof(this->file_name)-1] = '\0';
427  }else{
428  this->file_name[0] = '\0';
429  }
430  }
431 
432  const char*get_truncated_file_name()const{
433  return file_name;
434  }
435 
436  void set_file_line(unsigned file_line){
437  this->file_line = file_line;
438  }
439 
440  unsigned get_file_line()const{
441  return file_line;
442  }
443 
444  char*next_line(){
445  if(data_begin == data_end)
446  return nullptr;
447 
448  ++file_line;
449 
450  assert(data_begin < data_end);
451  assert(data_end <= block_len*2);
452 
453  if(data_begin >= block_len){
454  std::memcpy(buffer.get(), buffer.get()+block_len, block_len);
455  data_begin -= block_len;
456  data_end -= block_len;
457  if(reader.is_valid())
458  {
459  data_end += reader.finish_read();
460  std::memcpy(buffer.get()+block_len, buffer.get()+2*block_len, block_len);
461  reader.start_read(buffer.get() + 2*block_len, block_len);
462  }
463  }
464 
465  int line_end = data_begin;
466  while(buffer[line_end] != '\n' && line_end != data_end){
467  ++line_end;
468  }
469 
470  if(line_end - data_begin + 1 > block_len){
471  error::line_length_limit_exceeded err;
472  err.set_file_name(file_name);
473  err.set_file_line(file_line);
474  throw err;
475  }
476 
477  if(buffer[line_end] == '\n' && line_end != data_end){
478  buffer[line_end] = '\0';
479  }else{
480  // some files are missing the newline at the end of the
481  // last line
482  ++data_end;
483  buffer[line_end] = '\0';
484  }
485 
486  // handle windows \r\n-line breaks
487  if(line_end != data_begin && buffer[line_end-1] == '\r')
488  buffer[line_end-1] = '\0';
489 
490  char*ret = buffer.get() + data_begin;
491  data_begin = line_end+1;
492  return ret;
493  }
494  };
495 
496 
498  // CSV //
500 
501  namespace error{
502  const int max_column_name_length = 63;
503  struct with_column_name{
504  with_column_name(){
505  std::memset(column_name, 0, max_column_name_length+1);
506  }
507 
508  void set_column_name(const char*column_name){
509  if(column_name != nullptr){
510  std::strncpy(this->column_name, column_name, max_column_name_length);
511  this->column_name[max_column_name_length] = '\0';
512  }else{
513  this->column_name[0] = '\0';
514  }
515  }
516 
517  char column_name[max_column_name_length+1];
518  };
519 
520 
521  const int max_column_content_length = 63;
522 
523  struct with_column_content{
524  with_column_content(){
525  std::memset(column_content, 0, max_column_content_length+1);
526  }
527 
528  void set_column_content(const char*column_content){
529  if(column_content != nullptr){
530  std::strncpy(this->column_content, column_content, max_column_content_length);
531  this->column_content[max_column_content_length] = '\0';
532  }else{
533  this->column_content[0] = '\0';
534  }
535  }
536 
537  char column_content[max_column_content_length+1];
538  };
539 
540 
541  struct extra_column_in_header :
542  base,
543  with_file_name,
544  with_column_name{
545  void format_error_message()const override{
546  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
547  R"(Extra column "%s" in header of file "%s".)"
548  , column_name, file_name);
549  }
550  };
551 
552  struct missing_column_in_header :
553  base,
554  with_file_name,
555  with_column_name{
556  void format_error_message()const override{
557  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
558  R"(Missing column "%s" in header of file "%s".)"
559  , column_name, file_name);
560  }
561  };
562 
563  struct duplicated_column_in_header :
564  base,
565  with_file_name,
566  with_column_name{
567  void format_error_message()const override{
568  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
569  R"(Duplicated column "%s" in header of file "%s".)"
570  , column_name, file_name);
571  }
572  };
573 
574  struct header_missing :
575  base,
576  with_file_name{
577  void format_error_message()const override{
578  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
579  "Header missing in file \"%s\"."
580  , file_name);
581  }
582  };
583 
584  struct too_few_columns :
585  base,
586  with_file_name,
587  with_file_line{
588  void format_error_message()const override{
589  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
590  "Too few columns in line %d in file \"%s\"."
591  , file_line, file_name);
592  }
593  };
594 
595  struct too_many_columns :
596  base,
597  with_file_name,
598  with_file_line{
599  void format_error_message()const override{
600  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
601  "Too many columns in line %d in file \"%s\"."
602  , file_line, file_name);
603  }
604  };
605 
606  struct escaped_string_not_closed :
607  base,
608  with_file_name,
609  with_file_line{
610  void format_error_message()const override{
611  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
612  "Escaped string was not closed in line %d in file \"%s\"."
613  , file_line, file_name);
614  }
615  };
616 
617  struct integer_must_be_positive :
618  base,
619  with_file_name,
620  with_file_line,
621  with_column_name,
622  with_column_content{
623  void format_error_message()const override{
624  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
625  R"(The integer "%s" must be positive or 0 in column "%s" in file "%s" in line "%d".)"
626  , column_content, column_name, file_name, file_line);
627  }
628  };
629 
630  struct no_digit :
631  base,
632  with_file_name,
633  with_file_line,
634  with_column_name,
635  with_column_content{
636  void format_error_message()const override{
637  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
638  R"(The integer "%s" contains an invalid digit in column "%s" in file "%s" in line "%d".)"
639  , column_content, column_name, file_name, file_line);
640  }
641  };
642 
643  struct integer_overflow :
644  base,
645  with_file_name,
646  with_file_line,
647  with_column_name,
648  with_column_content{
649  void format_error_message()const override{
650  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
651  R"(The integer "%s" overflows in column "%s" in file "%s" in line "%d".)"
652  , column_content, column_name, file_name, file_line);
653  }
654  };
655 
656  struct integer_underflow :
657  base,
658  with_file_name,
659  with_file_line,
660  with_column_name,
661  with_column_content{
662  void format_error_message()const override{
663  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
664  R"(The integer "%s" underflows in column "%s" in file "%s" in line "%d".)"
665  , column_content, column_name, file_name, file_line);
666  }
667  };
668 
669  struct invalid_single_character :
670  base,
671  with_file_name,
672  with_file_line,
673  with_column_name,
674  with_column_content{
675  void format_error_message()const override{
676  std::snprintf(error_message_buffer, sizeof(error_message_buffer),
677  R"(The content "%s" of column "%s" in file "%s" in line "%d" is not a single character.)"
678  , column_content, column_name, file_name, file_line);
679  }
680  };
681  }
682 
683  using ignore_column = unsigned int;
684  static const ignore_column ignore_no_column = 0;
685  static const ignore_column ignore_extra_column = 1;
686  static const ignore_column ignore_missing_column = 2;
687 
688  template<char ... trim_char_list>
689  struct trim_chars{
690  private:
691  constexpr static bool is_trim_char(char){
692  return false;
693  }
694 
695  template<class ...OtherTrimChars>
696  constexpr static bool is_trim_char(char c, char trim_char, OtherTrimChars...other_trim_chars){
697  return c == trim_char || is_trim_char(c, other_trim_chars...);
698  }
699 
700  public:
701  static void trim(char*&str_begin, char*&str_end){
702  while(str_begin != str_end && is_trim_char(*str_begin, trim_char_list...))
703  ++str_begin;
704  while(str_begin != str_end && is_trim_char(*(str_end-1), trim_char_list...))
705  --str_end;
706  *str_end = '\0';
707  }
708  };
709 
710 
711  struct no_comment{
712  static bool is_comment(const char*){
713  return false;
714  }
715  };
716 
717  template<char ... comment_start_char_list>
718  struct single_line_comment{
719  private:
720  constexpr static bool is_comment_start_char(char){
721  return false;
722  }
723 
724  template<class ...OtherCommentStartChars>
725  constexpr static bool is_comment_start_char(char c, char comment_start_char, OtherCommentStartChars...other_comment_start_chars){
726  return c == comment_start_char || is_comment_start_char(c, other_comment_start_chars...);
727  }
728 
729  public:
730 
731  static bool is_comment(const char*line){
732  return is_comment_start_char(*line, comment_start_char_list...);
733  }
734  };
735 
736  struct empty_line_comment{
737  static bool is_comment(const char*line){
738  if(*line == '\0')
739  return true;
740  while(*line == ' ' || *line == '\t'){
741  ++line;
742  if(*line == 0)
743  return true;
744  }
745  return false;
746  }
747  };
748 
749  template<char ... comment_start_char_list>
750  struct single_and_empty_line_comment{
751  static bool is_comment(const char*line){
752  return single_line_comment<comment_start_char_list...>::is_comment(line) || empty_line_comment::is_comment(line);
753  }
754  };
755 
756  template<char sep>
757  struct no_quote_escape{
758  static const char*find_next_column_end(const char*col_begin){
759  while(*col_begin != sep && *col_begin != '\0')
760  ++col_begin;
761  return col_begin;
762  }
763 
764  static void unescape(char*&, char*&){
765 
766  }
767  };
768 
769  template<char sep, char quote>
770  struct double_quote_escape{
771  static const char*find_next_column_end(const char*col_begin){
772  while(*col_begin != sep && *col_begin != '\0')
773  if(*col_begin != quote)
774  ++col_begin;
775  else{
776  do{
777  ++col_begin;
778  while(*col_begin != quote){
779  if(*col_begin == '\0')
780  throw error::escaped_string_not_closed();
781  ++col_begin;
782  }
783  ++col_begin;
784  }while(*col_begin == quote);
785  }
786  return col_begin;
787  }
788 
789  static void unescape(char*&col_begin, char*&col_end){
790  if(col_end - col_begin >= 2){
791  if(*col_begin == quote && *(col_end-1) == quote){
792  ++col_begin;
793  --col_end;
794  char*out = col_begin;
795  for(char*in = col_begin; in!=col_end; ++in){
796  if(*in == quote && (in+1) != col_end && *(in+1) == quote){
797  ++in;
798  }
799  *out = *in;
800  ++out;
801  }
802  col_end = out;
803  *col_end = '\0';
804  }
805  }
806 
807  }
808  };
809 
810  struct throw_on_overflow{
811  template<class T>
812  static void on_overflow(T&){
813  throw error::integer_overflow();
814  }
815 
816  template<class T>
817  static void on_underflow(T&){
818  throw error::integer_underflow();
819  }
820  };
821 
822  struct ignore_overflow{
823  template<class T>
824  static void on_overflow(T&){}
825 
826  template<class T>
827  static void on_underflow(T&){}
828  };
829 
830  struct set_to_max_on_overflow{
831  template<class T>
832  static void on_overflow(T&x){
833  x = std::numeric_limits<T>::max();
834  }
835 
836  template<class T>
837  static void on_underflow(T&x){
838  x = std::numeric_limits<T>::min();
839  }
840  };
841 
842 
843  namespace detail{
844  template<class quote_policy>
845  void chop_next_column(
846  char*&line, char*&col_begin, char*&col_end
847  ){
848  assert(line != nullptr);
849 
850  col_begin = line;
851  // the col_begin + (... - col_begin) removes the constness
852  col_end = col_begin + (quote_policy::find_next_column_end(col_begin) - col_begin);
853 
854  if(*col_end == '\0'){
855  line = nullptr;
856  }else{
857  *col_end = '\0';
858  line = col_end + 1;
859  }
860  }
861 
862  template<class trim_policy, class quote_policy>
863  void parse_line(
864  char*line,
865  char**sorted_col,
866  const std::vector<int>&col_order
867  ){
868  for (int i : col_order) {
869  if(line == nullptr)
870  throw ::io::error::too_few_columns();
871  char*col_begin, *col_end;
872  chop_next_column<quote_policy>(line, col_begin, col_end);
873 
874  if (i != -1) {
875  trim_policy::trim(col_begin, col_end);
876  quote_policy::unescape(col_begin, col_end);
877 
878  sorted_col[i] = col_begin;
879  }
880  }
881  if(line != nullptr)
882  throw ::io::error::too_many_columns();
883  }
884 
885  template<unsigned column_count, class trim_policy, class quote_policy>
886  void parse_header_line(
887  char*line,
888  std::vector<int>&col_order,
889  const std::string*col_name,
890  ignore_column ignore_policy
891  ){
892  col_order.clear();
893 
894  bool found[column_count];
895  std::fill(found, found + column_count, false);
896  while(line){
897  char*col_begin,*col_end;
898  chop_next_column<quote_policy>(line, col_begin, col_end);
899 
900  trim_policy::trim(col_begin, col_end);
901  quote_policy::unescape(col_begin, col_end);
902 
903  for(unsigned i=0; i<column_count; ++i)
904  if(col_begin == col_name[i]){
905  if(found[i]){
906  error::duplicated_column_in_header err;
907  err.set_column_name(col_begin);
908  throw err;
909  }
910  found[i] = true;
911  col_order.push_back(i);
912  col_begin = 0;
913  break;
914  }
915  if(col_begin){
916  if(ignore_policy & ::io::ignore_extra_column)
917  col_order.push_back(-1);
918  else{
919  error::extra_column_in_header err;
920  err.set_column_name(col_begin);
921  throw err;
922  }
923  }
924  }
925  if(!(ignore_policy & ::io::ignore_missing_column)){
926  for(unsigned i=0; i<column_count; ++i){
927  if(!found[i]){
928  error::missing_column_in_header err;
929  err.set_column_name(col_name[i].c_str());
930  throw err;
931  }
932  }
933  }
934  }
935 
936  template<class overflow_policy>
937  void parse(char*col, char &x){
938  if(!*col)
939  throw error::invalid_single_character();
940  x = *col;
941  ++col;
942  if(*col)
943  throw error::invalid_single_character();
944  }
945 
946  template<class overflow_policy>
947  void parse(char*col, std::string&x){
948  x = col;
949  }
950 
951  template<class overflow_policy>
952  void parse(char*col, const char*&x){
953  x = col;
954  }
955 
956  template<class overflow_policy>
957  void parse(char*col, char*&x){
958  x = col;
959  }
960 
961  template<class overflow_policy, class T>
962  void parse_unsigned_integer(const char*col, T&x){
963  x = 0;
964  while(*col != '\0'){
965  if('0' <= *col && *col <= '9'){
966  T y = *col - '0';
967  if(x > (std::numeric_limits<T>::max()-y)/10){
968  overflow_policy::on_overflow(x);
969  return;
970  }
971  x = 10*x+y;
972  }else
973  throw error::no_digit();
974  ++col;
975  }
976  }
977 
978  template<class overflow_policy>void parse(char*col, unsigned char &x)
979  {parse_unsigned_integer<overflow_policy>(col, x);}
980  template<class overflow_policy>void parse(char*col, unsigned short &x)
981  {parse_unsigned_integer<overflow_policy>(col, x);}
982  template<class overflow_policy>void parse(char*col, unsigned int &x)
983  {parse_unsigned_integer<overflow_policy>(col, x);}
984  template<class overflow_policy>void parse(char*col, unsigned long &x)
985  {parse_unsigned_integer<overflow_policy>(col, x);}
986  template<class overflow_policy>void parse(char*col, unsigned long long &x)
987  {parse_unsigned_integer<overflow_policy>(col, x);}
988 
989  template<class overflow_policy, class T>
990  void parse_signed_integer(const char*col, T&x){
991  if(*col == '-'){
992  ++col;
993 
994  x = 0;
995  while(*col != '\0'){
996  if('0' <= *col && *col <= '9'){
997  T y = *col - '0';
998  if(x < (std::numeric_limits<T>::min()+y)/10){
999  overflow_policy::on_underflow(x);
1000  return;
1001  }
1002  x = 10*x-y;
1003  }else
1004  throw error::no_digit();
1005  ++col;
1006  }
1007  return;
1008  }else if(*col == '+')
1009  ++col;
1010  parse_unsigned_integer<overflow_policy>(col, x);
1011  }
1012 
1013  template<class overflow_policy>void parse(char*col, signed char &x)
1014  {parse_signed_integer<overflow_policy>(col, x);}
1015  template<class overflow_policy>void parse(char*col, signed short &x)
1016  {parse_signed_integer<overflow_policy>(col, x);}
1017  template<class overflow_policy>void parse(char*col, signed int &x)
1018  {parse_signed_integer<overflow_policy>(col, x);}
1019  template<class overflow_policy>void parse(char*col, signed long &x)
1020  {parse_signed_integer<overflow_policy>(col, x);}
1021  template<class overflow_policy>void parse(char*col, signed long long &x)
1022  {parse_signed_integer<overflow_policy>(col, x);}
1023 
1024  template<class T>
1025  void parse_float(const char*col, T&x){
1026  bool is_neg = false;
1027  if(*col == '-'){
1028  is_neg = true;
1029  ++col;
1030  }else if(*col == '+')
1031  ++col;
1032 
1033  x = 0;
1034  while('0' <= *col && *col <= '9'){
1035  int y = *col - '0';
1036  x *= 10;
1037  x += y;
1038  ++col;
1039  }
1040 
1041  if(*col == '.'|| *col == ','){
1042  ++col;
1043  T pos = 1;
1044  while('0' <= *col && *col <= '9'){
1045  pos /= 10;
1046  int y = *col - '0';
1047  ++col;
1048  x += y*pos;
1049  }
1050  }
1051 
1052  if(*col == 'e' || *col == 'E'){
1053  ++col;
1054  int e;
1055 
1056  parse_signed_integer<set_to_max_on_overflow>(col, e);
1057 
1058  if(e != 0){
1059  T base;
1060  if(e < 0){
1061  base = T(0.1);
1062  e = -e;
1063  }else{
1064  base = T(10);
1065  }
1066 
1067  while(e != 1){
1068  if((e & 1) == 0){
1069  base = base*base;
1070  e >>= 1;
1071  }else{
1072  x *= base;
1073  --e;
1074  }
1075  }
1076  x *= base;
1077  }
1078  }else{
1079  if(*col != '\0')
1080  throw error::no_digit();
1081  }
1082 
1083  if(is_neg)
1084  x = -x;
1085  }
1086 
1087  template<class overflow_policy> void parse(char*col, float&x) { parse_float(col, x); }
1088  template<class overflow_policy> void parse(char*col, double&x) { parse_float(col, x); }
1089  template<class overflow_policy> void parse(char*col, long double&x) { parse_float(col, x); }
1090 
1091  template<class overflow_policy, class T>
1092  void parse(char*col, T&x){
1093  // Mute unused variable compiler warning
1094  (void)col;
1095  (void)x;
1096  // GCC evalutes "false" when reading the template and
1097  // "sizeof(T)!=sizeof(T)" only when instantiating it. This is why
1098  // this strange construct is used.
1099  static_assert(sizeof(T)!=sizeof(T),
1100  "Can not parse this type. Only buildin integrals, floats, char, char*, const char* and std::string are supported");
1101  }
1102 
1103  }
1104 
1105  template<unsigned column_count,
1106  class trim_policy = trim_chars<' ', '\t'>,
1107  class quote_policy = no_quote_escape<','>,
1108  class overflow_policy = throw_on_overflow,
1109  class comment_policy = no_comment
1110  >
1111  class CSVReader{
1112  private:
1113  LineReader in;
1114 
1115  char*row[column_count];
1116  std::string column_names[column_count];
1117 
1118  std::vector<int>col_order;
1119 
1120  template<class ...ColNames>
1121  void set_column_names(std::string s, ColNames...cols){
1122  column_names[column_count-sizeof...(ColNames)-1] = std::move(s);
1123  set_column_names(std::forward<ColNames>(cols)...);
1124  }
1125 
1126  void set_column_names(){}
1127 
1128 
1129  public:
1130  CSVReader() = delete;
1131  CSVReader(const CSVReader&) = delete;
1132  CSVReader&operator=(const CSVReader&);
1133 
1134  template<class ...Args>
1135  explicit CSVReader(Args&&...args):in(std::forward<Args>(args)...){
1136  std::fill(row, row+column_count, nullptr);
1137  col_order.resize(column_count);
1138  for(unsigned i=0; i<column_count; ++i)
1139  col_order[i] = i;
1140  for(unsigned i=1; i<=column_count; ++i)
1141  column_names[i-1] = "col"+std::to_string(i);
1142  }
1143 
1144  char*next_line(){
1145  return in.next_line();
1146  }
1147 
1148  template<class ...ColNames>
1149  void read_header(ignore_column ignore_policy, ColNames...cols){
1150  static_assert(sizeof...(ColNames)>=column_count, "not enough column names specified");
1151  static_assert(sizeof...(ColNames)<=column_count, "too many column names specified");
1152  try{
1153  set_column_names(std::forward<ColNames>(cols)...);
1154 
1155  char*line;
1156  do{
1157  line = in.next_line();
1158  if(!line)
1159  throw error::header_missing();
1160  }while(comment_policy::is_comment(line));
1161 
1162  detail::parse_header_line
1163  <column_count, trim_policy, quote_policy>
1164  (line, col_order, column_names, ignore_policy);
1165  }catch(error::with_file_name&err){
1166  err.set_file_name(in.get_truncated_file_name());
1167  throw;
1168  }
1169  }
1170 
1171  template<class ...ColNames>
1172  void set_header(ColNames...cols){
1173  static_assert(sizeof...(ColNames)>=column_count,
1174  "not enough column names specified");
1175  static_assert(sizeof...(ColNames)<=column_count,
1176  "too many column names specified");
1177  set_column_names(std::forward<ColNames>(cols)...);
1178  std::fill(row, row+column_count, nullptr);
1179  col_order.resize(column_count);
1180  for(unsigned i=0; i<column_count; ++i)
1181  col_order[i] = i;
1182  }
1183 
1184  bool has_column(const std::string&name) const {
1185  return col_order.end() != std::find(
1186  col_order.begin(), col_order.end(),
1187  std::find(std::begin(column_names), std::end(column_names), name)
1188  - std::begin(column_names));
1189  }
1190 
1191  void set_file_name(const std::string&file_name){
1192  in.set_file_name(file_name);
1193  }
1194 
1195  void set_file_name(const char*file_name){
1196  in.set_file_name(file_name);
1197  }
1198 
1199  const char*get_truncated_file_name()const{
1200  return in.get_truncated_file_name();
1201  }
1202 
1203  void set_file_line(unsigned file_line){
1204  in.set_file_line(file_line);
1205  }
1206 
1207  unsigned get_file_line()const{
1208  return in.get_file_line();
1209  }
1210 
1211  private:
1212  void parse_helper(std::size_t){}
1213 
1214  template<class T, class ...ColType>
1215  void parse_helper(std::size_t r, T&t, ColType&...cols){
1216  if(row[r]){
1217  try{
1218  try{
1219  ::io::detail::parse<overflow_policy>(row[r], t);
1220  }catch(error::with_column_content&err){
1221  err.set_column_content(row[r]);
1222  throw;
1223  }
1224  }catch(error::with_column_name&err){
1225  err.set_column_name(column_names[r].c_str());
1226  throw;
1227  }
1228  }
1229  parse_helper(r+1, cols...);
1230  }
1231 
1232 
1233  public:
1234  template<class ...ColType>
1235  bool read_row(ColType& ...cols){
1236  static_assert(sizeof...(ColType)>=column_count,
1237  "not enough columns specified");
1238  static_assert(sizeof...(ColType)<=column_count,
1239  "too many columns specified");
1240  try{
1241  try{
1242 
1243  char*line;
1244  do{
1245  line = in.next_line();
1246  if(!line)
1247  return false;
1248  }while(comment_policy::is_comment(line));
1249 
1250  detail::parse_line<trim_policy, quote_policy>
1251  (line, row, col_order);
1252 
1253  parse_helper(0, cols...);
1254  }catch(error::with_file_name&err){
1255  err.set_file_name(in.get_truncated_file_name());
1256  throw;
1257  }
1258  }catch(error::with_file_line&err){
1259  err.set_file_line(in.get_file_line());
1260  throw;
1261  }
1262 
1263  return true;
1264  }
1265  };
1266 }
1267 #endif
1268 
Definition: csv.h:52