22 #ifndef SINGA_UTILS_DATA_SHARD_H_
23 #define SINGA_UTILS_DATA_SHARD_H_
25 #include <google/protobuf/message.h>
28 #include <unordered_set>
70 DataShard(
const std::string& folder,
int mode);
71 DataShard(
const std::string& folder,
int mode,
int capacity);
82 bool Next(std::string* key, google::protobuf::Message* val);
91 bool Next(std::string* key, std::string* val);
99 bool Insert(
const std::string& key,
const google::protobuf::Message& tuple);
107 bool Insert(
const std::string& key,
const std::string& tuple);
127 inline std::string
path() {
return path_; }
136 int Next(std::string* key);
154 std::string path_ =
"";
158 std::unordered_set<std::string> keys_;
160 char* buf_ =
nullptr;
171 #endif // SINGA_UTILS_DATA_SHARD_H_
bool Next(std::string *key, google::protobuf::Message *val)
read next tuple from the shard.
bool Insert(const std::string &key, const google::protobuf::Message &tuple)
Append one tuple to the shard.
DataShard(const std::string &folder, int mode)
Init the shard obj.
int Count()
Iterate through all tuples to get the num of all tuples.
std::string path()
Definition: data_shard.h:127
Data shard stores training/validation/test tuples.
Definition: data_shard.h:51
int PrepareForAppend(const std::string &path)
Setup the disk pointer to the right position for append in case that the pervious write crashes...
bool PrepareNextField(int size)
Read data from disk if the current data in the buffer is not a full field.
void Flush()
Flush buffered data to disk.
void SeekToFirst()
Move the read pointer to the head of the shard file.