Apache Singa
A General Distributed Deep Learning Library
reader.h
1 
19 #ifndef SINGA_IO_READER_H_
20 #define SINGA_IO_READER_H_
21 
22 #include <cstring>
23 #include <fstream>
24 #include <string>
25 #include "singa/singa_config.h"
26 
27 #ifdef USE_LMDB
28 #include <lmdb.h>
29 #include <sys/stat.h>
30 #include <vector>
31 #endif // USE_LMDB
32 
33 namespace singa {
34 namespace io {
35 
36 using std::string;
37 
41 class Reader {
42  public:
45  virtual ~Reader() {}
46 
50  virtual bool Open(const std::string& path) = 0;
51 
53  virtual void Close() = 0;
54 
59  virtual bool Read(std::string* key, std::string* value) = 0;
60 
63  virtual int Count() = 0;
64 
66  virtual void SeekToFirst() = 0;
67 };
68 
70 class BinFileReader : public Reader {
71  public:
72  ~BinFileReader() { Close(); }
74  bool Open(const std::string& path) override;
76  bool Open(const std::string& path, int capacity);
78  void Close() override;
80  bool Read(std::string* key, std::string* value) override;
82  int Count() override;
84  void SeekToFirst() override;
86  inline std::string path() { return path_; }
87 
88  protected:
90  bool OpenFile();
93  bool ReadField(std::string* content);
96  bool PrepareNextField(int size);
97 
98  private:
100  std::string path_ = "";
102  std::ifstream fdat_;
104  char* buf_ = nullptr;
106  int offset_ = 0;
108  int capacity_ = 10485760;
110  int bufsize_ = 0;
112  const char kMagicWord[2] = {'s', 'g'};
113 };
114 
116 class TextFileReader : public Reader {
117  public:
118  ~TextFileReader() { Close(); }
120  bool Open(const std::string& path) override;
122  void Close() override;
124  bool Read(std::string* key, std::string* value) override;
126  int Count() override;
128  void SeekToFirst() override;
130  inline std::string path() { return path_; }
131 
132  private:
134  std::string path_ = "";
136  std::ifstream fdat_;
138  int lineNo_ = 0;
139 };
140 
141 #ifdef USE_LMDB
142 class LMDBReader : public Reader {
144  public:
145  ~LMDBReader() { Close(); }
147  bool Open(const std::string& path) override;
149  void Close() override;
151  bool Read(std::string* key, std::string* value) override;
153  int Count() override;
155  void SeekToFirst() override;
157  inline std::string path() { return path_; }
159  inline bool valid() { return valid_; }
160 
161  protected:
163  void Seek(MDB_cursor_op op);
164  inline void MDB_CHECK(int mdb_status);
165 
166  private:
168  std::string path_ = "";
170  MDB_env* mdb_env_ = nullptr;
172  MDB_dbi mdb_dbi_;
174  MDB_txn* mdb_txn_ = nullptr;
176  MDB_cursor* mdb_cursor_ = nullptr;
178  MDB_val mdb_key_, mdb_value_;
180  bool valid_;
182  bool first_;
183 };
184 #endif // USE_LMDB
185 } // namespace io
186 } // namespace singa
187 
188 #endif // SINGA_IO_READER_H_
virtual void SeekToFirst()=0
Seek to the first tuple when the cursor arrives to the end of the file.
std::string path()
return path to binary file
Definition: reader.h:86
std::string path()
return path to text file
Definition: reader.h:130
General Reader that provides functions for reading tuples.
Definition: reader.h:41
Binfilereader reads tuples from binary file with key-value pairs.
Definition: reader.h:70
virtual ~Reader()
In case that users forget to call Close() to release resources, e.g., memory, you can release them he...
Definition: reader.h:45
virtual bool Open(const std::string &path)=0
path is the path to the storage, could be a file path, database connection, or hdfs path...
TextFileReader reads tuples from CSV file.
Definition: reader.h:116
virtual int Count()=0
Iterate through all tuples to get the num of all tuples.
virtual bool Read(std::string *key, std::string *value)=0
Read a tuple.
virtual void Close()=0
Release resources.
Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements...
Definition: common.h:48