Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
TagSoupExtractionResult |
|
| 1.9375;1.938 | ||||
TagSoupExtractionResult$PropertyPath |
|
| 1.9375;1.938 | ||||
TagSoupExtractionResult$ResourceRoot |
|
| 1.9375;1.938 |
1 | /* | |
2 | * Licensed to the Apache Software Foundation (ASF) under one or more | |
3 | * contributor license agreements. See the NOTICE file distributed with | |
4 | * this work for additional information regarding copyright ownership. | |
5 | * The ASF licenses this file to You under the Apache License, Version 2.0 | |
6 | * (the "License"); you may not use this file except in compliance with | |
7 | * the License. You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, software | |
12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | * See the License for the specific language governing permissions and | |
15 | * limitations under the License. | |
16 | */ | |
17 | ||
18 | package org.apache.any23.extractor; | |
19 | ||
20 | import org.apache.any23.extractor.html.MicroformatExtractor; | |
21 | import org.openrdf.model.BNode; | |
22 | import org.openrdf.model.Resource; | |
23 | ||
24 | import java.util.Arrays; | |
25 | import java.util.List; | |
26 | ||
27 | /** | |
28 | * This interface models a specific {@link ExtractionResult} | |
29 | * able to collect property roots generated by <i>HTML Microformat</i> extractions. | |
30 | * | |
31 | * @author Michele Mostarda (mostarda@fbk.eu) | |
32 | */ | |
33 | public interface TagSoupExtractionResult extends ExtractionResult { | |
34 | ||
35 | /** | |
36 | * Adds a root property to the extraction result, specifying also | |
37 | * the <i>path</i> corresponding to the root of data which generated the property | |
38 | * and the extractor responsible for such addition. | |
39 | * | |
40 | * @param path the <i>path</i> from the document root to the local root of the data generating the property. | |
41 | * @param root the property root node. | |
42 | * @param extractor the extractor responsible of such extraction. | |
43 | */ | |
44 | void addResourceRoot(String[] path, Resource root, Class<? extends MicroformatExtractor> extractor); | |
45 | ||
46 | /** | |
47 | * Returns all the collected property roots. | |
48 | * | |
49 | * @return an <b>unmodifiable</b> list of | |
50 | * {@link TagSoupExtractionResult.ResourceRoot}s. | |
51 | */ | |
52 | List<ResourceRoot> getResourceRoots(); | |
53 | ||
54 | /** | |
55 | * Adds a property path to the list of the extracted data. | |
56 | * | |
57 | * @param extractor the identifier of the extractor responsible for retrieving such property. | |
58 | * @param propertySubject the subject of the property. | |
59 | * @param property the property URI. | |
60 | * @param object the property object if any, <code>null</code> otherwise. | |
61 | * @param path the path of the <i>HTML</i> node from which the property literal has been extracted. | |
62 | */ | |
63 | void addPropertyPath( | |
64 | Class<? extends MicroformatExtractor> extractor, | |
65 | Resource propertySubject, | |
66 | Resource property, | |
67 | BNode object, | |
68 | String[] path | |
69 | ); | |
70 | ||
71 | /** | |
72 | * Returns all the collected property paths. | |
73 | * | |
74 | * @return a valid list of property paths. | |
75 | */ | |
76 | List<PropertyPath> getPropertyPaths(); | |
77 | ||
78 | /** | |
79 | * Defines a property root object. | |
80 | */ | |
81 | class ResourceRoot { | |
82 | private String[] path; | |
83 | private Resource root; | |
84 | private Class<? extends MicroformatExtractor> extractor; | |
85 | ||
86 | 0 | public ResourceRoot(String[] path, Resource root, Class<? extends MicroformatExtractor> extractor) { |
87 | 0 | if(path == null || path.length == 0) { |
88 | 0 | throw new IllegalArgumentException( String.format("Invalid xpath: '%s'.", Arrays.toString(path) ) ); |
89 | } | |
90 | 0 | if(root == null) { |
91 | 0 | throw new IllegalArgumentException("Invalid root, cannot be null."); |
92 | } | |
93 | 0 | if(extractor == null) { |
94 | 0 | throw new IllegalArgumentException("Invalid extractor, cannot ne null"); |
95 | } | |
96 | 0 | this.path = path; |
97 | 0 | this.root = root; |
98 | 0 | this.extractor = extractor; |
99 | 0 | } |
100 | ||
101 | public String[] getPath() { | |
102 | 0 | return path; |
103 | } | |
104 | ||
105 | public Resource getRoot() { | |
106 | 0 | return root; |
107 | } | |
108 | ||
109 | public Class<? extends MicroformatExtractor> getExtractor() { | |
110 | 0 | return extractor; |
111 | } | |
112 | ||
113 | @Override | |
114 | public String toString() { | |
115 | 0 | return String.format( |
116 | "%s-%s-%s %s", | |
117 | this.getClass().getCanonicalName(), | |
118 | Arrays.toString(path), | |
119 | root, | |
120 | extractor | |
121 | ); | |
122 | } | |
123 | } | |
124 | ||
125 | /** | |
126 | * Defines a property path object. | |
127 | */ | |
128 | class PropertyPath { | |
129 | ||
130 | private Class<? extends MicroformatExtractor> extractor; | |
131 | private String[] path; | |
132 | private Resource subject; | |
133 | private Resource property; | |
134 | private BNode object; | |
135 | ||
136 | 0 | public PropertyPath(String[] path, Resource subject, Resource property, BNode object, Class<? extends MicroformatExtractor> extractor) { |
137 | 0 | if(path == null) { |
138 | 0 | throw new NullPointerException("path cannot be null."); |
139 | } | |
140 | 0 | if(subject == null) { |
141 | 0 | throw new NullPointerException("subject cannot be null."); |
142 | } | |
143 | 0 | if(property == null) { |
144 | 0 | throw new NullPointerException("property cannot be null."); |
145 | } | |
146 | 0 | if(extractor == null) { |
147 | 0 | throw new NullPointerException("extractor cannot be null."); |
148 | } | |
149 | 0 | this.path = path; |
150 | 0 | this.subject = subject; |
151 | 0 | this.property = property; |
152 | 0 | this.object = object; |
153 | 0 | this.extractor = extractor; |
154 | 0 | } |
155 | ||
156 | public String[] getPath() { | |
157 | 0 | return path; |
158 | } | |
159 | ||
160 | public Resource getSubject() { | |
161 | 0 | return subject; |
162 | } | |
163 | ||
164 | public Resource getProperty() { | |
165 | 0 | return property; |
166 | } | |
167 | ||
168 | public BNode getObject() { | |
169 | 0 | return object; |
170 | } | |
171 | ||
172 | public Class<? extends MicroformatExtractor> getExtractor() { | |
173 | 0 | return extractor; |
174 | } | |
175 | ||
176 | @Override | |
177 | public String toString() { | |
178 | 0 | return String.format( |
179 | "%s %s - %s - %s -- %s -->", | |
180 | this.getClass().getCanonicalName(), | |
181 | Arrays.toString(path), | |
182 | extractor, | |
183 | subject, | |
184 | property | |
185 | ); | |
186 | } | |
187 | } | |
188 | ||
189 | } |