1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
package org.apache.giraph.io.gora.utils; |
19 | |
|
20 | |
import java.io.IOException; |
21 | |
import java.util.ArrayList; |
22 | |
import java.util.List; |
23 | |
|
24 | |
import org.apache.gora.mapreduce.GoraInputSplit; |
25 | |
import org.apache.gora.mapreduce.GoraMapReduceUtils; |
26 | |
import org.apache.gora.mapreduce.GoraRecordReader; |
27 | |
import org.apache.gora.persistency.Persistent; |
28 | |
import org.apache.gora.persistency.impl.PersistentBase; |
29 | |
import org.apache.gora.query.PartitionQuery; |
30 | |
import org.apache.gora.query.Query; |
31 | |
import org.apache.gora.query.impl.PartitionQueryImpl; |
32 | |
import org.apache.gora.store.DataStore; |
33 | |
import org.apache.gora.util.IOUtils; |
34 | |
import org.apache.hadoop.conf.Configuration; |
35 | |
import org.apache.hadoop.mapreduce.InputFormat; |
36 | |
import org.apache.hadoop.mapreduce.InputSplit; |
37 | |
import org.apache.hadoop.mapreduce.Job; |
38 | |
import org.apache.hadoop.mapreduce.JobContext; |
39 | |
import org.apache.hadoop.mapreduce.RecordReader; |
40 | |
import org.apache.hadoop.mapreduce.TaskAttemptContext; |
41 | |
|
42 | |
|
43 | |
|
44 | |
|
45 | |
|
46 | |
|
47 | |
|
48 | |
|
49 | |
|
50 | |
|
51 | |
|
52 | 0 | public class ExtraGoraInputFormat<K, T extends PersistentBase> |
53 | |
extends InputFormat<K, T> { |
54 | |
|
55 | |
|
56 | |
|
57 | |
|
58 | |
public static final String QUERY_KEY = "gora.inputformat.query"; |
59 | |
|
60 | |
|
61 | |
|
62 | |
|
63 | |
private DataStore<K, T> dataStore; |
64 | |
|
65 | |
|
66 | |
|
67 | |
|
68 | |
private Query<K, T> query; |
69 | |
|
70 | |
|
71 | |
|
72 | |
|
73 | |
|
74 | |
|
75 | |
@Override |
76 | |
@SuppressWarnings("unchecked") |
77 | |
public RecordReader<K, T> createRecordReader(InputSplit split, |
78 | |
TaskAttemptContext context) throws IOException, InterruptedException { |
79 | |
|
80 | 0 | PartitionQuery<K, T> partitionQuery = (PartitionQuery<K, T>) |
81 | 0 | ((GoraInputSplit) split).getQuery(); |
82 | |
|
83 | |
|
84 | 0 | return new GoraRecordReader<K, T>(partitionQuery, context); |
85 | |
} |
86 | |
|
87 | |
|
88 | |
|
89 | |
|
90 | |
|
91 | |
|
92 | |
@Override |
93 | |
public List<InputSplit> getSplits(JobContext context) throws IOException, |
94 | |
InterruptedException { |
95 | 0 | List<PartitionQuery<K, T>> queries = |
96 | 0 | getDataStore().getPartitions(getQuery()); |
97 | 0 | List<InputSplit> splits = new ArrayList<InputSplit>(queries.size()); |
98 | 0 | for (PartitionQuery<K, T> partQuery : queries) { |
99 | 0 | ((PartitionQueryImpl) partQuery).setConf(context.getConfiguration()); |
100 | 0 | splits.add(new GoraInputSplit(context.getConfiguration(), partQuery)); |
101 | 0 | } |
102 | 0 | return splits; |
103 | |
} |
104 | |
|
105 | |
|
106 | |
|
107 | |
|
108 | |
public DataStore<K, T> getDataStore() { |
109 | 0 | return dataStore; |
110 | |
} |
111 | |
|
112 | |
|
113 | |
|
114 | |
|
115 | |
public void setDataStore(DataStore<K, T> datStore) { |
116 | 0 | this.dataStore = datStore; |
117 | 0 | } |
118 | |
|
119 | |
|
120 | |
|
121 | |
|
122 | |
public Query<K, T> getQuery() { |
123 | 0 | return query; |
124 | |
} |
125 | |
|
126 | |
|
127 | |
|
128 | |
|
129 | |
public void setQuery(Query<K, T> query) { |
130 | 0 | this.query = query; |
131 | 0 | } |
132 | |
|
133 | |
|
134 | |
|
135 | |
|
136 | |
|
137 | |
|
138 | |
|
139 | |
|
140 | |
|
141 | |
public static <K, T extends Persistent> void setQuery(Configuration conf, |
142 | |
Query<K, T> query) throws IOException { |
143 | 0 | IOUtils.storeToConf(query, conf, QUERY_KEY); |
144 | 0 | } |
145 | |
|
146 | |
|
147 | |
|
148 | |
|
149 | |
|
150 | |
|
151 | |
|
152 | |
public Query<K, T> getQuery(Configuration conf) throws IOException { |
153 | 0 | return IOUtils.loadFromConf(conf, QUERY_KEY); |
154 | |
} |
155 | |
|
156 | |
|
157 | |
|
158 | |
|
159 | |
|
160 | |
|
161 | |
|
162 | |
|
163 | |
|
164 | |
|
165 | |
|
166 | |
public static <K, V extends Persistent> void setInput(Job job, |
167 | |
Query<K, V> query, DataStore<K, V> dataStore, boolean reuseObjects) |
168 | |
throws IOException { |
169 | |
|
170 | 0 | Configuration conf = job.getConfiguration(); |
171 | |
|
172 | 0 | GoraMapReduceUtils.setIOSerializations(conf, reuseObjects); |
173 | |
|
174 | 0 | job.setInputFormatClass(ExtraGoraInputFormat.class); |
175 | 0 | ExtraGoraInputFormat.setQuery(job.getConfiguration(), query); |
176 | 0 | } |
177 | |
} |