1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
package org.apache.giraph.lib; |
19 | |
|
20 | |
import com.google.common.collect.Maps; |
21 | |
import org.apache.giraph.graph.BasicVertex; |
22 | |
import org.apache.giraph.graph.BspUtils; |
23 | |
import org.apache.giraph.graph.Edge; |
24 | |
import org.apache.giraph.lib.TextVertexInputFormat.TextVertexReader; |
25 | |
import org.apache.hadoop.conf.Configuration; |
26 | |
import org.apache.hadoop.io.LongWritable; |
27 | |
import org.apache.hadoop.io.Text; |
28 | |
import org.apache.hadoop.io.Writable; |
29 | |
import org.apache.hadoop.io.WritableComparable; |
30 | |
import org.apache.hadoop.mapreduce.RecordReader; |
31 | |
|
32 | |
import java.io.IOException; |
33 | |
import java.util.Map; |
34 | |
|
35 | |
|
36 | |
|
37 | |
|
38 | |
|
39 | |
|
40 | |
|
41 | |
|
42 | |
|
43 | |
|
44 | |
|
45 | |
|
46 | |
|
47 | |
|
48 | |
@SuppressWarnings("rawtypes") |
49 | |
public abstract class AdjacencyListVertexReader<I extends WritableComparable, |
50 | |
V extends Writable, E extends Writable, M extends Writable> extends |
51 | |
TextVertexInputFormat.TextVertexReader<I, V, E, M> { |
52 | |
|
53 | |
public static final String LINE_TOKENIZE_VALUE = "adj.list.input.delimiter"; |
54 | |
|
55 | |
public static final String LINE_TOKENIZE_VALUE_DEFAULT = "\t"; |
56 | |
|
57 | 0 | private String splitValue = null; |
58 | |
|
59 | |
|
60 | |
|
61 | |
|
62 | |
public interface LineSanitizer { |
63 | |
|
64 | |
|
65 | |
|
66 | |
|
67 | |
|
68 | |
|
69 | |
String sanitize(String s); |
70 | |
} |
71 | |
|
72 | |
|
73 | |
|
74 | |
|
75 | |
private final LineSanitizer sanitizer; |
76 | |
|
77 | |
|
78 | |
|
79 | |
|
80 | |
|
81 | |
|
82 | |
public AdjacencyListVertexReader( |
83 | |
RecordReader<LongWritable, Text> lineRecordReader) { |
84 | 0 | super(lineRecordReader); |
85 | 0 | sanitizer = null; |
86 | 0 | } |
87 | |
|
88 | |
|
89 | |
|
90 | |
|
91 | |
|
92 | |
|
93 | |
|
94 | |
public AdjacencyListVertexReader( |
95 | |
RecordReader<LongWritable, Text> lineRecordReader, |
96 | |
LineSanitizer sanitizer) { |
97 | 0 | super(lineRecordReader); |
98 | 0 | this.sanitizer = sanitizer; |
99 | 0 | } |
100 | |
|
101 | |
|
102 | |
|
103 | |
|
104 | |
|
105 | |
|
106 | |
|
107 | |
public abstract void decodeId(String s, I id); |
108 | |
|
109 | |
|
110 | |
|
111 | |
|
112 | |
|
113 | |
|
114 | |
public abstract void decodeValue(String s, V value); |
115 | |
|
116 | |
|
117 | |
|
118 | |
|
119 | |
|
120 | |
|
121 | |
|
122 | |
public abstract void decodeEdge(String id, String value, Edge<I, E> edge); |
123 | |
|
124 | |
|
125 | |
@Override |
126 | |
public boolean nextVertex() throws IOException, InterruptedException { |
127 | 0 | return getRecordReader().nextKeyValue(); |
128 | |
} |
129 | |
|
130 | |
@Override |
131 | |
public BasicVertex<I, V, E, M> getCurrentVertex() |
132 | |
throws IOException, InterruptedException { |
133 | 0 | Configuration conf = getContext().getConfiguration(); |
134 | 0 | String line = getRecordReader().getCurrentValue().toString(); |
135 | 0 | BasicVertex<I, V, E, M> vertex = BspUtils.createVertex(conf); |
136 | |
|
137 | 0 | if (sanitizer != null) { |
138 | 0 | line = sanitizer.sanitize(line); |
139 | |
} |
140 | |
|
141 | 0 | if (splitValue == null) { |
142 | 0 | splitValue = conf.get(LINE_TOKENIZE_VALUE, LINE_TOKENIZE_VALUE_DEFAULT); |
143 | |
} |
144 | |
|
145 | 0 | String [] values = line.split(splitValue); |
146 | |
|
147 | 0 | if ((values.length < 2) || (values.length % 2 != 0)) { |
148 | 0 | throw new IllegalArgumentException( |
149 | |
"Line did not split correctly: " + line); |
150 | |
} |
151 | |
|
152 | 0 | I vertexId = BspUtils.<I>createVertexIndex(conf); |
153 | 0 | decodeId(values[0], vertexId); |
154 | |
|
155 | 0 | V value = BspUtils.<V>createVertexValue(conf); |
156 | 0 | decodeValue(values[1], value); |
157 | |
|
158 | 0 | int i = 2; |
159 | 0 | Map<I, E> edges = Maps.newHashMap(); |
160 | 0 | Edge<I, E> edge = new Edge<I, E>(); |
161 | 0 | while (i < values.length) { |
162 | 0 | decodeEdge(values[i], values[i + 1], edge); |
163 | 0 | edges.put(edge.getDestVertexId(), edge.getEdgeValue()); |
164 | 0 | i += 2; |
165 | |
} |
166 | 0 | vertex.initialize(vertexId, value, edges, null); |
167 | 0 | return vertex; |
168 | |
} |
169 | |
} |