1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
package org.apache.giraph.worker; |
20 | |
|
21 | |
import java.io.IOException; |
22 | |
|
23 | |
import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration; |
24 | |
import org.apache.giraph.edge.Edge; |
25 | |
import org.apache.giraph.edge.OutEdges; |
26 | |
import org.apache.giraph.graph.Vertex; |
27 | |
import org.apache.giraph.graph.VertexEdgeCount; |
28 | |
import org.apache.giraph.io.GiraphInputFormat; |
29 | |
import org.apache.giraph.io.VertexInputFormat; |
30 | |
import org.apache.giraph.io.VertexReader; |
31 | |
import org.apache.giraph.io.filters.VertexInputFilter; |
32 | |
import org.apache.giraph.mapping.translate.TranslateEdge; |
33 | |
import org.apache.giraph.io.InputType; |
34 | |
import org.apache.giraph.ooc.OutOfCoreEngine; |
35 | |
import org.apache.giraph.partition.PartitionOwner; |
36 | |
import org.apache.giraph.utils.LoggerUtils; |
37 | |
import org.apache.giraph.utils.MemoryUtils; |
38 | |
import org.apache.hadoop.io.Writable; |
39 | |
import org.apache.hadoop.io.WritableComparable; |
40 | |
import org.apache.hadoop.mapreduce.InputSplit; |
41 | |
import org.apache.hadoop.mapreduce.Mapper; |
42 | |
import org.apache.log4j.Level; |
43 | |
import org.apache.log4j.Logger; |
44 | |
|
45 | |
import com.yammer.metrics.core.Counter; |
46 | |
import com.yammer.metrics.core.Meter; |
47 | |
|
48 | |
|
49 | |
|
50 | |
|
51 | |
|
52 | |
|
53 | |
|
54 | |
|
55 | |
|
56 | |
|
57 | |
@SuppressWarnings("unchecked") |
58 | |
public class VertexInputSplitsCallable<I extends WritableComparable, |
59 | |
V extends Writable, E extends Writable> |
60 | |
extends InputSplitsCallable<I, V, E> { |
61 | |
|
62 | |
public static final int VERTICES_UPDATE_PERIOD = 250000; |
63 | |
|
64 | |
public static final int VERTICES_FILTERED_UPDATE_PERIOD = 2500; |
65 | |
|
66 | |
|
67 | 0 | private static final Logger LOG = |
68 | 0 | Logger.getLogger(VertexInputSplitsCallable.class); |
69 | |
|
70 | |
private final VertexInputFormat<I, V, E> vertexInputFormat; |
71 | |
|
72 | |
private final long inputSplitMaxVertices; |
73 | |
|
74 | |
private final BspServiceWorker<I, V, E> bspServiceWorker; |
75 | |
|
76 | |
private final VertexInputFilter<I, V, E> vertexInputFilter; |
77 | |
|
78 | |
private final boolean canEmbedInIds; |
79 | |
|
80 | |
|
81 | |
|
82 | |
|
83 | |
private final boolean reuseEdgeObjects; |
84 | |
|
85 | |
private final TranslateEdge<I, E> translateEdge; |
86 | |
|
87 | |
|
88 | |
|
89 | |
private final Meter totalVerticesMeter; |
90 | |
|
91 | |
private final Counter totalVerticesFilteredCounter; |
92 | |
|
93 | |
private final Meter totalEdgesMeter; |
94 | |
|
95 | |
|
96 | |
|
97 | |
|
98 | |
|
99 | |
|
100 | |
|
101 | |
|
102 | |
|
103 | |
|
104 | |
public VertexInputSplitsCallable( |
105 | |
VertexInputFormat<I, V, E> vertexInputFormat, |
106 | |
Mapper<?, ?, ?, ?>.Context context, |
107 | |
ImmutableClassesGiraphConfiguration<I, V, E> configuration, |
108 | |
BspServiceWorker<I, V, E> bspServiceWorker, |
109 | |
WorkerInputSplitsHandler splitsHandler) { |
110 | 0 | super(context, configuration, bspServiceWorker, splitsHandler); |
111 | 0 | this.vertexInputFormat = vertexInputFormat; |
112 | |
|
113 | 0 | inputSplitMaxVertices = configuration.getInputSplitMaxVertices(); |
114 | 0 | this.bspServiceWorker = bspServiceWorker; |
115 | 0 | vertexInputFilter = configuration.getVertexInputFilter(); |
116 | 0 | reuseEdgeObjects = configuration.reuseEdgeObjects(); |
117 | 0 | canEmbedInIds = bspServiceWorker |
118 | 0 | .getLocalData() |
119 | 0 | .getMappingStoreOps() != null && |
120 | |
bspServiceWorker |
121 | 0 | .getLocalData() |
122 | 0 | .getMappingStoreOps() |
123 | 0 | .hasEmbedding(); |
124 | 0 | translateEdge = bspServiceWorker.getTranslateEdge(); |
125 | |
|
126 | |
|
127 | 0 | totalVerticesMeter = getTotalVerticesLoadedMeter(); |
128 | 0 | totalVerticesFilteredCounter = getTotalVerticesFilteredCounter(); |
129 | 0 | totalEdgesMeter = getTotalEdgesLoadedMeter(); |
130 | 0 | } |
131 | |
|
132 | |
@Override |
133 | |
public GiraphInputFormat getInputFormat() { |
134 | 0 | return vertexInputFormat; |
135 | |
} |
136 | |
|
137 | |
@Override |
138 | |
public InputType getInputType() { |
139 | 0 | return InputType.VERTEX; |
140 | |
} |
141 | |
|
142 | |
|
143 | |
|
144 | |
|
145 | |
|
146 | |
|
147 | |
|
148 | |
|
149 | |
|
150 | |
|
151 | |
@Override |
152 | |
protected VertexEdgeCount readInputSplit( |
153 | |
InputSplit inputSplit) throws IOException, InterruptedException { |
154 | 0 | VertexReader<I, V, E> vertexReader = |
155 | 0 | vertexInputFormat.createVertexReader(inputSplit, context); |
156 | 0 | vertexReader.setConf(configuration); |
157 | |
|
158 | 0 | WorkerThreadGlobalCommUsage globalCommUsage = |
159 | |
this.bspServiceWorker |
160 | 0 | .getAggregatorHandler().newThreadAggregatorUsage(); |
161 | |
|
162 | 0 | vertexReader.initialize(inputSplit, context); |
163 | |
|
164 | 0 | vertexReader.setWorkerGlobalCommUsage(globalCommUsage); |
165 | |
|
166 | 0 | long inputSplitVerticesLoaded = 0; |
167 | 0 | long inputSplitVerticesFiltered = 0; |
168 | |
|
169 | 0 | long edgesSinceLastUpdate = 0; |
170 | 0 | long inputSplitEdgesLoaded = 0; |
171 | |
|
172 | 0 | int count = 0; |
173 | 0 | OutOfCoreEngine oocEngine = bspServiceWorker.getServerData().getOocEngine(); |
174 | 0 | while (vertexReader.nextVertex()) { |
175 | |
|
176 | |
|
177 | |
|
178 | 0 | if (oocEngine != null && |
179 | |
(++count & OutOfCoreEngine.CHECK_IN_INTERVAL) == 0) { |
180 | 0 | oocEngine.activeThreadCheckIn(); |
181 | |
} |
182 | 0 | Vertex<I, V, E> readerVertex = vertexReader.getCurrentVertex(); |
183 | 0 | if (readerVertex.getId() == null) { |
184 | 0 | throw new IllegalArgumentException( |
185 | |
"readInputSplit: Vertex reader returned a vertex " + |
186 | |
"without an id! - " + readerVertex); |
187 | |
} |
188 | 0 | if (canEmbedInIds) { |
189 | 0 | bspServiceWorker |
190 | 0 | .getLocalData() |
191 | 0 | .getMappingStoreOps() |
192 | 0 | .embedTargetInfo(readerVertex.getId()); |
193 | |
} |
194 | 0 | if (readerVertex.getValue() == null) { |
195 | 0 | readerVertex.setValue(configuration.createVertexValue()); |
196 | |
} |
197 | 0 | readerVertex.setConf(configuration); |
198 | |
|
199 | 0 | ++inputSplitVerticesLoaded; |
200 | |
|
201 | 0 | if (vertexInputFilter.dropVertex(readerVertex)) { |
202 | 0 | ++inputSplitVerticesFiltered; |
203 | 0 | if (inputSplitVerticesFiltered % VERTICES_FILTERED_UPDATE_PERIOD == 0) { |
204 | 0 | totalVerticesFilteredCounter.inc(inputSplitVerticesFiltered); |
205 | 0 | inputSplitVerticesFiltered = 0; |
206 | |
} |
207 | |
continue; |
208 | |
} |
209 | |
|
210 | |
|
211 | 0 | if (translateEdge != null) { |
212 | |
|
213 | 0 | if (readerVertex.getEdges() != null && readerVertex.getNumEdges() > 0) { |
214 | 0 | OutEdges<I, E> vertexOutEdges = configuration |
215 | 0 | .createAndInitializeOutEdges(readerVertex.getNumEdges()); |
216 | |
|
217 | |
|
218 | |
|
219 | |
|
220 | |
|
221 | |
|
222 | |
|
223 | |
|
224 | |
|
225 | 0 | for (Edge<I, E> edge : readerVertex.getEdges()) { |
226 | 0 | if (reuseEdgeObjects) { |
227 | 0 | bspServiceWorker |
228 | 0 | .getLocalData() |
229 | 0 | .getMappingStoreOps() |
230 | 0 | .embedTargetInfo(edge.getTargetVertexId()); |
231 | 0 | vertexOutEdges.add(edge); |
232 | |
} else { |
233 | 0 | vertexOutEdges.add(configuration.createEdge(translateEdge, edge)); |
234 | |
} |
235 | 0 | } |
236 | |
|
237 | 0 | readerVertex.setEdges(vertexOutEdges); |
238 | |
} |
239 | |
} |
240 | |
|
241 | 0 | PartitionOwner partitionOwner = |
242 | 0 | bspServiceWorker.getVertexPartitionOwner(readerVertex.getId()); |
243 | 0 | workerClientRequestProcessor.sendVertexRequest( |
244 | |
partitionOwner, readerVertex); |
245 | 0 | edgesSinceLastUpdate += readerVertex.getNumEdges(); |
246 | |
|
247 | |
|
248 | 0 | if (inputSplitVerticesLoaded % VERTICES_UPDATE_PERIOD == 0) { |
249 | 0 | totalVerticesMeter.mark(VERTICES_UPDATE_PERIOD); |
250 | 0 | WorkerProgress.get().addVerticesLoaded(VERTICES_UPDATE_PERIOD); |
251 | 0 | totalEdgesMeter.mark(edgesSinceLastUpdate); |
252 | 0 | inputSplitEdgesLoaded += edgesSinceLastUpdate; |
253 | 0 | edgesSinceLastUpdate = 0; |
254 | |
|
255 | 0 | LoggerUtils.setStatusAndLog( |
256 | |
context, LOG, Level.INFO, |
257 | |
"readVertexInputSplit: Loaded " + |
258 | 0 | totalVerticesMeter.count() + " vertices at " + |
259 | 0 | totalVerticesMeter.meanRate() + " vertices/sec " + |
260 | 0 | totalEdgesMeter.count() + " edges at " + |
261 | 0 | totalEdgesMeter.meanRate() + " edges/sec " + |
262 | 0 | MemoryUtils.getRuntimeMemoryStats()); |
263 | |
} |
264 | |
|
265 | |
|
266 | |
|
267 | 0 | if (inputSplitMaxVertices > 0 && |
268 | |
inputSplitVerticesLoaded >= inputSplitMaxVertices) { |
269 | 0 | if (LOG.isInfoEnabled()) { |
270 | 0 | LOG.info("readInputSplit: Leaving the input " + |
271 | |
"split early, reached maximum vertices " + |
272 | |
inputSplitVerticesLoaded); |
273 | |
} |
274 | |
break; |
275 | |
} |
276 | 0 | } |
277 | |
|
278 | 0 | totalVerticesMeter.mark(inputSplitVerticesLoaded % VERTICES_UPDATE_PERIOD); |
279 | 0 | totalEdgesMeter.mark(edgesSinceLastUpdate); |
280 | 0 | totalVerticesFilteredCounter.inc(inputSplitVerticesFiltered); |
281 | |
|
282 | 0 | vertexReader.close(); |
283 | |
|
284 | 0 | WorkerProgress.get().addVerticesLoaded( |
285 | |
inputSplitVerticesLoaded % VERTICES_UPDATE_PERIOD); |
286 | 0 | WorkerProgress.get().incrementVertexInputSplitsLoaded(); |
287 | |
|
288 | 0 | return new VertexEdgeCount(inputSplitVerticesLoaded, |
289 | |
inputSplitEdgesLoaded + edgesSinceLastUpdate, 0); |
290 | |
} |
291 | |
} |
292 | |
|