View Javadoc
1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements. See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership. The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.commons.rdf.api;
19  
20  import static org.junit.Assert.*;
21  
22  import java.util.ArrayList;
23  import java.util.HashSet;
24  import java.util.Iterator;
25  import java.util.List;
26  import java.util.Map;
27  import java.util.Optional;
28  import java.util.Set;
29  import java.util.concurrent.ConcurrentHashMap;
30  import java.util.stream.Collectors;
31  import java.util.stream.Stream;
32  
33  import org.junit.Assume;
34  import org.junit.Before;
35  import org.junit.Test;
36  
37  /**
38   * Test Dataset implementation
39   * <p>
40   * To add to your implementation's tests, create a subclass with a name ending
41   * in <code>Test</code> and provide {@link #createFactory()} which minimally
42   * must support {@link RDF#createDataset()} and {@link RDF#createIRI(String)}, but
43   * ideally support all operations.
44   * <p>
45   * This test uses try-with-resources blocks for calls to {@link Dataset#stream()}
46   * and {@link Dataset#iterate()}.
47   * 
48   * @see Dataset
49   * @see RDF
50   */
51  public abstract class AbstractDatasetTest {
52  
53      protected RDF factory;
54      protected Dataset dataset;
55      protected IRI alice;
56      protected IRI bob;
57      protected IRI name;
58      protected IRI knows;
59      protected IRI member;
60      protected BlankNode bnode1;
61      protected BlankNode bnode2;
62      protected Literal aliceName;
63      protected Literal bobName;
64      protected Literal secretClubName;
65      protected Literal companyName;
66      protected Quad bobNameQuad;
67      private IRI isPrimaryTopicOf;
68      private IRI graph1;
69      private BlankNode graph2;
70  
71      /**
72       * 
73       * This method must be overridden by the implementing test to provide a
74       * factory for the test to create {@link Dataset}, {@link IRI} etc.
75       * 
76       * @return {@link RDF} instance to be tested.
77       */
78      protected abstract RDF createFactory();
79  
80      @Before
81      public void createDatasetAndAdd() {
82          factory = createFactory();
83          dataset = factory.createDataset();
84          assertEquals(0, dataset.size());
85  
86          graph1 = factory.createIRI("http://example.com/graph1");
87          graph2 = factory.createBlankNode();
88          
89          alice = factory.createIRI("http://example.com/alice");
90          bob = factory.createIRI("http://example.com/bob");
91          name = factory.createIRI("http://xmlns.com/foaf/0.1/name");
92          knows = factory.createIRI("http://xmlns.com/foaf/0.1/knows");
93          member = factory.createIRI("http://xmlns.com/foaf/0.1/member");
94          bnode1 = factory.createBlankNode("org1");
95          bnode2 = factory.createBlankNode("org2");
96  
97          secretClubName = factory.createLiteral("The Secret Club");
98          companyName = factory.createLiteral("A company");
99          aliceName = factory.createLiteral("Alice");
100         bobName = factory.createLiteral("Bob", "en-US");
101 
102         dataset.add(graph1, alice, name, aliceName);
103         dataset.add(graph1, alice, knows, bob);
104 
105         dataset.add(graph1, alice, member, bnode1);
106 
107         bobNameQuad = factory.createQuad(graph2, bob, name, bobName);
108         dataset.add(bobNameQuad);
109 
110         dataset.add(factory.createQuad(graph2, bob, member, bnode1));
111         dataset.add(factory.createQuad(graph2, bob, member, bnode2));
112         // NOTE: bnode1 used in both graph1 and graph2
113         dataset.add(graph1, bnode1, name, secretClubName);
114         dataset.add(graph2, bnode2, name, companyName);
115         
116         // default graph describes graph1 and graph2        
117         isPrimaryTopicOf = factory.createIRI("http://xmlns.com/foaf/0.1/isPrimaryTopicOf");
118         dataset.add(null, alice, isPrimaryTopicOf, graph1);
119         dataset.add(null, bob, isPrimaryTopicOf, graph2);
120         
121         
122     }
123 
124     @Test
125     public void size() throws Exception {
126         assertEquals(10, dataset.size());
127     }
128 
129     @Test
130     public void iterate() throws Exception {
131         Assume.assumeTrue(dataset.size() > 0);
132         final List<Quad> quads = new ArrayList<>();
133         for (final Quad t : dataset.iterate()) {
134             quads.add(t);
135         }
136         assertEquals(dataset.size(), quads.size());
137         
138         //assertTrue(quads.contains(bobNameQuad));
139         // java.util.List won't do any BlankNode mapping, so 
140         // instead bobNameQuad of let's check for an IRI-centric quad 
141         final Quad q = factory.createQuad(graph1, alice, name, aliceName);
142         quads.contains(q);
143 
144         // aborted iteration
145         final Iterable<Quad> iterate = dataset.iterate();
146         final Iterator<Quad> it = iterate.iterator();
147 
148         assertTrue(it.hasNext());
149         it.next();
150         closeIterable(iterate);
151 
152         // second iteration - should start from fresh and
153         // get the same count
154         long count = 0;
155         final Iterable<Quad> iterable = dataset.iterate();
156         for (@SuppressWarnings("unused") final
157         Quad t : iterable) {
158             count++;
159         }
160         assertEquals(dataset.size(), count);
161     }
162 
163     /**
164      * Special quad closing for RDF4J.
165      */
166     private void closeIterable(final Iterable<Quad> iterate) throws Exception {
167         if (iterate instanceof AutoCloseable) {
168             ((AutoCloseable) iterate).close();
169         }
170     }
171 
172     @Test
173     public void iterateFilter() throws Exception {
174         final List<RDFTerm> friends = new ArrayList<>();
175         final IRI alice = factory.createIRI("http://example.com/alice");
176         final IRI knows = factory.createIRI("http://xmlns.com/foaf/0.1/knows");
177         for (final Quad t : dataset.iterate(null, alice, knows, null)) {
178             friends.add(t.getObject());
179         }
180         assertEquals(1, friends.size());
181         assertEquals(bob, friends.get(0));
182 
183         // .. can we iterate over zero hits?
184         final Iterable<Quad> iterate = dataset.iterate(Optional.of(graph2), bob, knows, alice);
185         for (final Quad unexpected : iterate) {
186             fail("Unexpected quad " + unexpected);
187         }
188         // closeIterable(iterate);
189     }
190 
191     @Test
192     public void contains() throws Exception {
193         assertFalse(dataset.contains(null, bob, knows, alice)); // or so he claims..
194 
195         assertTrue(dataset.contains(Optional.of(graph1), alice, knows, bob));
196 
197         try (Stream<? extends Quad> stream = dataset.stream()) {
198             final Optional<? extends Quad> first = stream.skip(4).findFirst();
199             Assume.assumeTrue(first.isPresent());
200             final Quad existingQuad = first.get();
201             assertTrue(dataset.contains(existingQuad));
202         }
203 
204         final Quad nonExistingQuad = factory.createQuad(graph2, bob, knows, alice);
205         assertFalse(dataset.contains(nonExistingQuad));
206 
207         // An existing quad
208         final Quad quad = factory.createQuad(graph1, alice, knows, bob);
209         // FIXME: Should not this always be true?
210          assertTrue(dataset.contains(quad));
211     }
212 
213     @Test
214     public void remove() throws Exception {
215         final long fullSize = dataset.size();
216         dataset.remove(Optional.of(graph1), alice, knows, bob);
217         final long shrunkSize = dataset.size();
218         assertEquals(1, fullSize - shrunkSize);
219 
220         dataset.remove(Optional.of(graph1), alice, knows, bob);
221         assertEquals(shrunkSize, dataset.size()); // unchanged
222 
223         dataset.add(graph1, alice, knows, bob);
224         dataset.add(graph2, alice, knows, bob);
225         dataset.add(graph2, alice, knows, bob);
226         // Undetermined size at this point -- but at least it
227         // should be bigger
228         assertTrue(dataset.size() > shrunkSize);
229 
230         // and after a single remove they should all be gone
231         dataset.remove(null, alice, knows, bob);
232         assertEquals(shrunkSize, dataset.size());
233 
234         Quad otherQuad;
235         try (Stream<? extends Quad> stream = dataset.stream()) {
236             final Optional<? extends Quad> anyQuad = stream.findAny();
237             Assume.assumeTrue(anyQuad.isPresent());
238             otherQuad = anyQuad.get();
239         }
240 
241         dataset.remove(otherQuad);
242         assertEquals(shrunkSize - 1, dataset.size());
243         dataset.remove(otherQuad);
244         assertEquals(shrunkSize - 1, dataset.size()); // no change
245 
246         // for some reason in rdf4j this causes duplicates!
247         dataset.add(otherQuad);
248         // dataset.stream().forEach(System.out::println);
249         // should have increased
250         assertTrue(dataset.size() >= shrunkSize);
251     }
252 
253     @Test
254     public void clear() throws Exception {
255         dataset.clear();
256         assertFalse(dataset.contains(null, alice, knows, bob));
257         assertEquals(0, dataset.size());
258         dataset.clear(); // no-op
259         assertEquals(0, dataset.size());
260         assertFalse(dataset.contains(null, null, null, null)); // nothing here
261     }
262 
263     @Test
264     public void getQuads() throws Exception {
265         long quadCount;
266         try (Stream<? extends Quad> stream = dataset.stream()) {
267             quadCount = stream.count();
268         }
269         assertTrue(quadCount > 0);
270 
271         try (Stream<? extends Quad> stream = dataset.stream()) {
272             assertTrue(stream.allMatch(t -> dataset.contains(t)));
273         }
274 
275         // Check exact count
276         Assume.assumeNotNull(bnode1, bnode2, aliceName, bobName, secretClubName, companyName, bobNameQuad);
277         assertEquals(10, quadCount);
278     }
279 
280     @Test
281     public void getQuadsQuery() throws Exception {
282 
283         try (Stream<? extends Quad> stream = dataset.stream(Optional.of(graph1), alice, null, null)) {
284             final long aliceCount = stream.count();
285             assertTrue(aliceCount > 0);
286             Assume.assumeNotNull(aliceName);
287             assertEquals(3, aliceCount);
288         }
289 
290         Assume.assumeNotNull(bnode1, bnode2, bobName, companyName, secretClubName);
291         try (Stream<? extends Quad> stream = dataset.stream(null, null, name, null)) {
292             assertEquals(4, stream.count());
293         }
294         Assume.assumeNotNull(bnode1);
295         try (Stream<? extends Quad> stream = dataset.stream(null, null, member, null)) {
296             assertEquals(3, stream.count());
297         }
298     }
299 
300     @Test
301     public void addBlankNodesFromMultipleDatasets() {
302             // Create two separate Dataset instances
303             final Dataset g1 = createDataset1();
304             final Dataset g2 = createDataset2();
305 
306             // and add them to a new Dataset g3
307             final Dataset g3 = factory.createDataset();
308             addAllQuads(g1, g3);
309             addAllQuads(g2, g3);
310 
311             // Let's make a map to find all those blank nodes after insertion
312             // (The Dataset implementation is not currently required to
313             // keep supporting those BlankNodes with contains() - see
314             // COMMONSRDF-15)
315 
316             final Map<String, BlankNodeOrIRI> whoIsWho = new ConcurrentHashMap<>();
317             // ConcurrentHashMap as we will try parallel forEach below,
318             // which should not give inconsistent results (it does with a
319             // HashMap!)
320 
321             // look up BlankNodes by name
322             final IRI name = factory.createIRI("http://xmlns.com/foaf/0.1/name");
323             try (Stream<? extends Quad> stream = g3.stream(null, null, name, null)) {
324                 stream.parallel().forEach(t -> whoIsWho.put(t.getObject().ntriplesString(), t.getSubject()));
325             }
326 
327             assertEquals(4, whoIsWho.size());
328             // and contains 4 unique values
329             assertEquals(4, new HashSet<>(whoIsWho.values()).size());
330 
331             final BlankNodeOrIRI b1Alice = whoIsWho.get("\"Alice\"");
332             assertNotNull(b1Alice);
333             final BlankNodeOrIRI b2Bob = whoIsWho.get("\"Bob\"");
334             assertNotNull(b2Bob);
335             final BlankNodeOrIRI b1Charlie = whoIsWho.get("\"Charlie\"");
336             assertNotNull(b1Charlie);
337             final BlankNodeOrIRI b2Dave = whoIsWho.get("\"Dave\"");
338             assertNotNull(b2Dave);
339 
340             // All blank nodes should differ
341             notEquals(b1Alice, b2Bob);
342             notEquals(b1Alice, b1Charlie);
343             notEquals(b1Alice, b2Dave);
344             notEquals(b2Bob, b1Charlie);
345             notEquals(b2Bob, b2Dave);
346             notEquals(b1Charlie, b2Dave);
347 
348             // And we should be able to query with them again
349             // as we got them back from g3
350             final IRI hasChild = factory.createIRI("http://example.com/hasChild");
351             // FIXME: Check graph2 BlankNode in these ..?
352             assertTrue(g3.contains(null, b1Alice, hasChild, b2Bob));
353             assertTrue(g3.contains(null, b2Dave, hasChild, b1Charlie));
354             // But not
355             assertFalse(g3.contains(null, b1Alice, hasChild, b1Alice));
356             assertFalse(g3.contains(null, b1Alice, hasChild, b1Charlie));
357             assertFalse(g3.contains(null, b1Alice, hasChild, b2Dave));
358             // nor
359             assertFalse(g3.contains(null, b2Dave, hasChild, b1Alice));
360             assertFalse(g3.contains(null, b2Dave, hasChild, b1Alice));
361 
362             // and these don't have any children (as far as we know)
363             assertFalse(g3.contains(null, b2Bob, hasChild, null));
364             assertFalse(g3.contains(null, b1Charlie, hasChild, null));
365     }
366 
367     private void notEquals(final BlankNodeOrIRI node1, final BlankNodeOrIRI node2) {
368         assertFalse(node1.equals(node2));
369         // in which case we should be able to assume
370         // (as they are in the same dataset)
371         assertFalse(node1.ntriplesString().equals(node2.ntriplesString()));
372     }
373 
374     /**
375      * Add all quads from the source to the target.
376      * <p>
377      * The quads may be copied in any order. No special conversion or
378      * adaptation of {@link BlankNode}s are performed.
379      *
380      * @param source
381      *            Source Dataset to copy quads from
382      * @param target
383      *            Target Dataset where quads will be added
384      */
385     private void addAllQuads(final Dataset source, final Dataset target) {
386 
387         // unordered() as we don't need to preserve quad order
388         // sequential() as we don't (currently) require target Dataset to be
389         // thread-safe
390 
391         try (Stream<? extends Quad> stream = source.stream()) {
392             stream.unordered().sequential().forEach(t -> target.add(t));
393         }
394     }
395 
396     /**
397      * Make a new dataset with two BlankNodes - each with a different
398      * uniqueReference
399      */
400     private Dataset createDataset1() {
401         final RDF factory1 = createFactory();
402 
403         final IRI name = factory1.createIRI("http://xmlns.com/foaf/0.1/name");
404         final Dataset g1 = factory1.createDataset();
405         final BlankNode b1 = createOwnBlankNode("b1", "0240eaaa-d33e-4fc0-a4f1-169d6ced3680");
406         g1.add(b1, b1, name, factory1.createLiteral("Alice"));
407 
408         final BlankNode b2 = createOwnBlankNode("b2", "9de7db45-0ce7-4b0f-a1ce-c9680ffcfd9f");
409         g1.add(b2, b2, name, factory1.createLiteral("Bob"));
410 
411         final IRI hasChild = factory1.createIRI("http://example.com/hasChild");
412         g1.add(null, b1, hasChild, b2);
413 
414         return g1;
415     }
416 
417     /**
418      * Create a different implementation of BlankNode to be tested with
419      * dataset.add(a,b,c); (the implementation may or may not then choose to
420      * translate such to its own instances)
421      * 
422      * @param name
423      * @return
424      */
425     private BlankNode createOwnBlankNode(final String name, final String uuid) {
426         return new BlankNode() {
427             @Override
428             public String ntriplesString() {                
429                 return "_: " + name;
430             }
431 
432             @Override
433             public String uniqueReference() {
434                 return uuid;
435             }
436 
437             @Override
438             public int hashCode() {
439                 return uuid.hashCode();
440             }
441 
442             @Override
443             public boolean equals(final Object obj) {
444                 if (!(obj instanceof BlankNode)) {
445                     return false;
446                 }
447                 final BlankNode other = (BlankNode) obj;
448                 return uuid.equals(other.uniqueReference());
449             }
450         };
451     }
452 
453     private Dataset createDataset2() {
454         final RDF factory2 = createFactory();
455         final IRI name = factory2.createIRI("http://xmlns.com/foaf/0.1/name");
456 
457         final Dataset g2 = factory2.createDataset();
458 
459         final BlankNode b1 = createOwnBlankNode("b1", "bc8d3e45-a08f-421d-85b3-c25b373abf87");
460         g2.add(b1, b1, name, factory2.createLiteral("Charlie"));
461 
462         final BlankNode b2 = createOwnBlankNode("b2", "2209097a-5078-4b03-801a-6a2d2f50d739");
463         g2.add(b2, b2, name, factory2.createLiteral("Dave"));
464 
465         final IRI hasChild = factory2.createIRI("http://example.com/hasChild");
466         // NOTE: Opposite direction of loadDataset1
467         g2.add(b2, b2, hasChild, b1);
468         return g2;
469     }
470     
471     /**
472      * Ensure {@link Dataset#getGraphNames()} contains our two graphs.
473      * 
474      * @throws Exception
475      *             If test fails
476      */
477     @Test
478     public void getGraphNames() throws Exception {
479         final Set<BlankNodeOrIRI> names = dataset.getGraphNames().collect(Collectors.toSet());        
480         assertTrue("Can't find graph name " + graph1, names.contains(graph1));
481         assertTrue("Found no quads in graph1", dataset.contains(Optional.of(graph1), null, null, null));
482         
483         final Optional<BlankNodeOrIRI> graphName2 = dataset.getGraphNames().filter(BlankNode.class::isInstance).findAny();
484         assertTrue("Could not find graph2-like BlankNode", graphName2.isPresent()); 
485         assertTrue("Found no quads in graph2", dataset.contains(graphName2, null, null, null));
486 
487         // Some implementations like Virtuoso might have additional internal graphs,
488         // so we can't assume this:
489         //assertEquals(2, names.size());
490     }
491     
492     @Test
493     public void getGraph() throws Exception {
494         final Graph defaultGraph = dataset.getGraph();
495         // TODO: Can we assume the default graph was empty before our new triples?
496         assertEquals(2, defaultGraph.size());
497         assertTrue(defaultGraph.contains(alice, isPrimaryTopicOf, graph1));
498         // NOTE: graph2 is a BlankNode
499         assertTrue(defaultGraph.contains(bob, isPrimaryTopicOf, null));
500     }
501 
502 
503     @Test
504     public void getGraphNull() throws Exception {
505         // Default graph should be present
506         final Graph defaultGraph = dataset.getGraph(null).get();
507         // TODO: Can we assume the default graph was empty before our new triples?
508         assertEquals(2, defaultGraph.size());
509         assertTrue(defaultGraph.contains(alice, isPrimaryTopicOf, graph1));
510         // NOTE: wildcard as graph2 is a (potentially mapped) BlankNode
511         assertTrue(defaultGraph.contains(bob, isPrimaryTopicOf, null));
512     }
513     
514 
515     @Test
516     public void getGraph1() throws Exception {
517         // graph1 should be present
518         final Graph g1 = dataset.getGraph(graph1).get();
519         assertEquals(4, g1.size());
520         
521         assertTrue(g1.contains(alice, name, aliceName));
522         assertTrue(g1.contains(alice, knows, bob));
523         assertTrue(g1.contains(alice, member, null));
524         assertTrue(g1.contains(null, name, secretClubName));
525     }
526 
527     @Test
528     public void getGraph2() throws Exception {
529         // graph2 should be present, even if is named by a BlankNode
530         // We'll look up the potentially mapped graph2 blanknode
531         final BlankNodeOrIRI graph2Name = (BlankNodeOrIRI) dataset.stream(Optional.empty(), bob, isPrimaryTopicOf, null)
532                 .map(Quad::getObject).findAny().get();
533         
534         final Graph g2 = dataset.getGraph(graph2Name).get();
535         assertEquals(4, g2.size());
536         final Triple bobNameTriple = bobNameQuad.asTriple();
537         assertTrue(g2.contains(bobNameTriple));
538         assertTrue(g2.contains(bob, member, bnode1));
539         assertTrue(g2.contains(bob, member, bnode2));
540         assertFalse(g2.contains(bnode1, name, secretClubName));
541         assertTrue(g2.contains(bnode2, name, companyName));
542     }
543     
544 
545     
546     /**
547      * An attempt to use the Java 8 streams to look up a more complicated query.
548      * <p>
549      * FYI, the equivalent SPARQL version (untested):
550      * 
551      * <pre>
552      *     SELECT ?orgName WHERE {
553      *             ?org foaf:name ?orgName .
554      *             ?alice foaf:member ?org .
555      *             ?bob foaf:member ?org .
556      *             ?alice foaf:knows ?bob .
557      *           FILTER NOT EXIST { ?bob foaf:knows ?alice }
558      *    }
559      * </pre>
560      *
561      * @throws Exception If test fails
562      */
563     @Test
564     public void whyJavaStreamsMightNotTakeOverFromSparql() throws Exception {
565         Assume.assumeNotNull(bnode1, bnode2, secretClubName);
566         // Find a secret organizations
567         try (Stream<? extends Quad> stream = dataset.stream(null, null, knows, null)) {
568             assertEquals("\"The Secret Club\"",
569                     // Find One-way "knows"
570                     stream.filter(t -> !dataset.contains(null, (BlankNodeOrIRI) t.getObject(), knows, t.getSubject()))
571                             .map(knowsQuad -> {
572                                 try (Stream<? extends Quad> memberOf = dataset
573                                         // and those they know, what are they
574                                         // member of?
575                                         .stream(null, (BlankNodeOrIRI) knowsQuad.getObject(), member, null)) {
576                                     return memberOf
577                                             // keep those which first-guy is a
578                                             // member of
579                                             .filter(memberQuad -> dataset.contains(null, knowsQuad.getSubject(), member,
580                                                     // First hit is good enough
581                                                     memberQuad.getObject()))
582                                             .findFirst().get().getObject();
583                                 }
584                             })
585                             // then look up the name of that org
586                             .map(org -> {
587                                 try (Stream<? extends Quad> orgName = dataset.stream(null, (BlankNodeOrIRI) org, name,
588                                         null)) {
589                                     return orgName.findFirst().get().getObject().ntriplesString();
590                                 }
591                             }).findFirst().get());
592         }
593     }
594 }