1 #ifndef INCLUDE_UTILS_SHARD_H_
2 #define INCLUDE_UTILS_SHARD_H_
4 #include <google/protobuf/message.h>
7 #include <unordered_set>
10 using google::protobuf::Message;
52 DataShard(std::string folder,
char mode,
int capacity=104857600);
62 bool Next(std::string *key, Message* val);
70 bool Next(std::string *key, std::string* val);
78 bool Insert(
const std::string& key,
const Message& tuple);
85 bool Insert(
const std::string& key,
const std::string& tuple);
114 int Next(std::string *key);
134 std::unordered_set<std::string> keys_;
145 #endif // INCLUDE_UTILS_SHARD_H_
bool Insert(const std::string &key, const Message &tuple)
Append one tuple to the shard.
read only mode used in training
Definition: data_shard.h:37
Data shard stores training/validation/test tuples.
Definition: data_shard.h:33
int PrepareForAppend(std::string path)
Setup the disk pointer to the right position for append in case that the pervious write crashes...
const std::string path()
Definition: data_shard.h:104
bool Next(std::string *key, Message *val)
read next tuple from the shard.
bool PrepareNextField(int size)
Read data from disk if the current data in the buffer is not a full field.
const int Count()
Iterate through all tuples to get the num of all tuples.
void Flush()
Flush buffered data to disk.
append mode, e.g. used when previous creating crashes
Definition: data_shard.h:39
void SeekToFirst()
Move the read pointer to the head of the shard file.
DataShard(std::string folder, char mode, int capacity=104857600)
Init the shard obj.