1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
package org.apache.any23.extractor; |
19 | |
|
20 | |
import org.apache.any23.configuration.DefaultConfiguration; |
21 | |
import org.apache.any23.extractor.csv.CSVExtractor; |
22 | |
import org.apache.any23.extractor.html.AdrExtractor; |
23 | |
import org.apache.any23.extractor.html.GeoExtractor; |
24 | |
import org.apache.any23.extractor.html.HCalendarExtractor; |
25 | |
import org.apache.any23.extractor.html.HCardExtractor; |
26 | |
import org.apache.any23.extractor.html.HListingExtractor; |
27 | |
import org.apache.any23.extractor.html.HRecipeExtractor; |
28 | |
import org.apache.any23.extractor.html.HResumeExtractor; |
29 | |
import org.apache.any23.extractor.html.HReviewExtractor; |
30 | |
import org.apache.any23.extractor.html.HTMLMetaExtractor; |
31 | |
import org.apache.any23.extractor.html.HeadLinkExtractor; |
32 | |
import org.apache.any23.extractor.html.ICBMExtractor; |
33 | |
import org.apache.any23.extractor.html.LicenseExtractor; |
34 | |
import org.apache.any23.extractor.html.SpeciesExtractor; |
35 | |
import org.apache.any23.extractor.html.TitleExtractor; |
36 | |
import org.apache.any23.extractor.html.TurtleHTMLExtractor; |
37 | |
import org.apache.any23.extractor.html.XFNExtractor; |
38 | |
import org.apache.any23.extractor.microdata.MicrodataExtractor; |
39 | |
import org.apache.any23.extractor.rdf.NQuadsExtractor; |
40 | |
import org.apache.any23.extractor.rdf.NTriplesExtractor; |
41 | |
import org.apache.any23.extractor.rdf.RDFXMLExtractor; |
42 | |
import org.apache.any23.extractor.rdf.TriXExtractor; |
43 | |
import org.apache.any23.extractor.rdf.TurtleExtractor; |
44 | |
import org.apache.any23.extractor.rdfa.RDFa11Extractor; |
45 | |
import org.apache.any23.extractor.rdfa.RDFaExtractor; |
46 | |
|
47 | |
import java.util.ArrayList; |
48 | |
import java.util.Collections; |
49 | |
import java.util.HashMap; |
50 | |
import java.util.List; |
51 | |
import java.util.Map; |
52 | |
|
53 | |
|
54 | |
|
55 | |
|
56 | |
|
57 | 0 | public class ExtractorRegistry { |
58 | |
|
59 | |
|
60 | |
|
61 | |
|
62 | 0 | private static ExtractorRegistry instance = null; |
63 | |
|
64 | |
|
65 | |
|
66 | |
|
67 | |
|
68 | 0 | private Map<String, ExtractorFactory<?>> factories = new HashMap<String, ExtractorFactory<?>>(); |
69 | |
|
70 | |
|
71 | |
|
72 | |
|
73 | |
public static ExtractorRegistry getInstance() { |
74 | |
|
75 | 0 | synchronized (ExtractorRegistry.class) { |
76 | 0 | final DefaultConfiguration conf = DefaultConfiguration.singleton(); |
77 | 0 | if (instance == null) { |
78 | 0 | instance = new ExtractorRegistry(); |
79 | 0 | instance.register(RDFXMLExtractor.factory); |
80 | 0 | instance.register(TurtleExtractor.factory); |
81 | 0 | instance.register(NTriplesExtractor.factory); |
82 | 0 | instance.register(NQuadsExtractor.factory); |
83 | 0 | instance.register(TriXExtractor.factory); |
84 | 0 | if(conf.getFlagProperty("any23.extraction.rdfa.programmatic")) { |
85 | 0 | instance.register(RDFa11Extractor.factory); |
86 | |
} else { |
87 | 0 | instance.register(RDFaExtractor.factory); |
88 | |
} |
89 | 0 | instance.register(HeadLinkExtractor.factory); |
90 | 0 | instance.register(LicenseExtractor.factory); |
91 | 0 | instance.register(TitleExtractor.factory); |
92 | 0 | instance.register(XFNExtractor.factory); |
93 | 0 | instance.register(ICBMExtractor.factory); |
94 | 0 | instance.register(AdrExtractor.factory); |
95 | 0 | instance.register(GeoExtractor.factory); |
96 | 0 | instance.register(HCalendarExtractor.factory); |
97 | 0 | instance.register(HCardExtractor.factory); |
98 | 0 | instance.register(HListingExtractor.factory); |
99 | 0 | instance.register(HResumeExtractor.factory); |
100 | 0 | instance.register(HReviewExtractor.factory); |
101 | 0 | instance.register(HRecipeExtractor.factory); |
102 | 0 | instance.register(SpeciesExtractor.factory); |
103 | 0 | instance.register(TurtleHTMLExtractor.factory); |
104 | 0 | instance.register(MicrodataExtractor.factory); |
105 | 0 | instance.register(CSVExtractor.factory); |
106 | 0 | if(conf.getFlagProperty("any23.extraction.head.meta")) { |
107 | 0 | instance.register(HTMLMetaExtractor.factory); |
108 | |
} |
109 | |
} |
110 | 0 | } |
111 | 0 | return instance; |
112 | |
} |
113 | |
|
114 | |
|
115 | |
|
116 | |
|
117 | |
|
118 | |
|
119 | |
|
120 | |
|
121 | |
public void register(ExtractorFactory<?> factory) { |
122 | 0 | if (factories.containsKey(factory.getExtractorName())) { |
123 | 0 | throw new IllegalArgumentException(String.format("Extractor name clash: %s", |
124 | |
factory.getExtractorName())); |
125 | |
} |
126 | 0 | factories.put(factory.getExtractorName(), factory); |
127 | 0 | } |
128 | |
|
129 | |
|
130 | |
|
131 | |
|
132 | |
|
133 | |
|
134 | |
|
135 | |
|
136 | |
|
137 | |
|
138 | |
public ExtractorFactory<?> getFactory(String name) { |
139 | 0 | if (!factories.containsKey(name)) { |
140 | 0 | throw new IllegalArgumentException("Unregistered extractor name: " + name); |
141 | |
} |
142 | 0 | return factories.get(name); |
143 | |
} |
144 | |
|
145 | |
|
146 | |
|
147 | |
|
148 | |
|
149 | |
public ExtractorGroup getExtractorGroup() { |
150 | 0 | return getExtractorGroup(getAllNames()); |
151 | |
} |
152 | |
|
153 | |
|
154 | |
|
155 | |
|
156 | |
|
157 | |
|
158 | |
|
159 | |
public ExtractorGroup getExtractorGroup(List<String> names) { |
160 | 0 | List<ExtractorFactory<?>> members = new ArrayList<ExtractorFactory<?>>(names.size()); |
161 | 0 | for (String name : names) { |
162 | 0 | members.add(getFactory(name)); |
163 | |
} |
164 | 0 | return new ExtractorGroup(members); |
165 | |
} |
166 | |
|
167 | |
|
168 | |
|
169 | |
|
170 | |
|
171 | |
|
172 | |
|
173 | |
public boolean isRegisteredName(String name) { |
174 | 0 | return factories.containsKey(name); |
175 | |
} |
176 | |
|
177 | |
|
178 | |
|
179 | |
|
180 | |
public List<String> getAllNames() { |
181 | 0 | List<String> result = new ArrayList<String>(factories.keySet()); |
182 | 0 | Collections.sort(result); |
183 | 0 | return result; |
184 | |
} |
185 | |
|
186 | |
} |