/* * (c) Copyright 2004, 2005, 2006, 2007 Hewlett-Packard Development Company, LP * [See end of file] */ package arqo; import java.util.* ; import com.hp.hpl.jena.graph.Node; import com.hp.hpl.jena.graph.Triple; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.StmtIterator; import com.hp.hpl.jena.rdf.model.Statement; import com.hp.hpl.jena.util.FileManager; import com.hp.hpl.jena.vocabulary.OWL; import com.hp.hpl.jena.vocabulary.RDF; import com.hp.hpl.jena.vocabulary.RDFS; import com.hp.hpl.jena.sparql.core.BasicPattern; import com.hp.hpl.jena.sparql.engine.optimizer.core.BasicPatternJoin; import com.hp.hpl.jena.sparql.engine.optimizer.probability.ProbabilityFactory; import com.hp.hpl.jena.sparql.engine.optimizer.probability.Probability; /** * * * @author Markus Stocker * @version $Id$ */ public class accuracy { private static String inGraphFileName = null ; private static String inIndexFileName = null ; private static boolean distinctPredicates = false ; private static boolean ignoreEmptyResultSet = false ; private static int runs = 20 ; private static int run = 0 ; private static int triples = 1 ; private static List subjects = new ArrayList() ; // List private static List predicates = new ArrayList() ; // List private static List objects = new ArrayList() ; // List /** * Main program * * @param args */ public static void main(String[] args) { List patterns = new ArrayList() ; // List List probabilities = new ArrayList() ; // List List selectivities = new ArrayList() ; // List try { readCmdParams(args) ; System.out.println("Data: " + inGraphFileName) ; System.out.println("Index: " + inIndexFileName) ; Model dataModel = FileManager.get().loadModel(inGraphFileName) ; Model indexModel = FileManager.get().loadModel(inIndexFileName) ; Probability probability = ProbabilityFactory.loadDefaultModel(dataModel, indexModel) ; // The the sets for SPO splitStatements(dataModel) ; int numOfSubjects = subjects.size() ; int numOfPredicates = predicates.size() ; int numOfObjects = objects.size() ; Random rand = new Random() ; while (run < runs) { double p, s ; Triple triple1 = null , triple2 = null ; Node subject1 = null, predicate1 = null, object1 = null, subject2 = null, predicate2 = null, object2 = null ; // Decide whether the nodes should be bound or unbound subject1 = boundOrUnbound((Node)subjects.get(rand.nextInt(numOfSubjects))) ; predicate1 = boundOrUnbound((Node)predicates.get(rand.nextInt(numOfPredicates))) ; object1 = boundOrUnbound((Node)objects.get(rand.nextInt(numOfObjects))) ; if (triples == 2) { // Only in the case of joined triple patterns subject2 = boundOrUnbound((Node)subjects.get(rand.nextInt(numOfSubjects))) ; predicate2 = boundOrUnbound((Node)predicates.get(rand.nextInt(numOfPredicates))) ; object2 = boundOrUnbound((Node)objects.get(rand.nextInt(numOfObjects))) ; if (distinctPredicates) { predicate1 = (Node)predicates.get(rand.nextInt(numOfPredicates)) ; predicate2 = (Node)predicates.get(rand.nextInt(numOfPredicates)) ; while (predicate1.equals(predicate2)) { predicate1 = (Node)predicates.get(rand.nextInt(numOfPredicates)) ; predicate2 = (Node)predicates.get(rand.nextInt(numOfPredicates)) ; } } } BasicPattern pattern = new BasicPattern() ; if (triples == 1) { triple1 = new Triple(subject1, predicate1, object1) ; pattern.add(triple1) ; p = probability.getProbability(triple1) ; s = probability.getSelectivity(triple1) ; } else { pattern.add(new Triple(subject1, predicate1, object1)) ; pattern.add(new Triple(subject2, predicate2, object2)) ; pattern = randJoinType(pattern) ; triple1 = pattern.get(0) ; triple2 = pattern.get(1) ; p = probability.getProbability(triple1, triple2) ; s = probability.getSelectivity(triple1, triple2) ; } if (!(ignoreEmptyResultSet && s == 0)) { probabilities.add(new Double(p)) ; selectivities.add(new Double(s)) ; patterns.add(pattern) ; run++ ; } } for (Iterator iter = probabilities.iterator(); iter.hasNext(); ) { System.out.println((Double)iter.next()) ; } System.out.println() ; for (Iterator iter = selectivities.iterator(); iter.hasNext(); ) { System.out.println((Double)iter.next()) ; } System.out.println() ; for (Iterator iter = patterns.iterator(); iter.hasNext(); ) { System.out.println((BasicPattern)iter.next()) ; } } catch (Exception e) { e.printStackTrace() ; } } private static BasicPattern randJoinType(BasicPattern pattern) { Random rand = new Random() ; int nextRand = rand.nextInt(4) ; BasicPattern p = new BasicPattern() ; Triple triple1 = pattern.get(0) ; Triple triple2 = pattern.get(1) ; Triple t1 = null, t2 = null ; if (! BasicPatternJoin.isJoined(triple1, triple2)) { while (nextRand == 2 && !triple1.getObject().isURI()) { // For the join OS, the object of the first triple has to be a URI nextRand = rand.nextInt(4) ; } t1 = triple1 ; if (nextRand == 0) // SS join t2 = new Triple(triple1.getSubject(), triple2.getPredicate(), triple2.getObject()) ; else if (nextRand == 1) // SO join t2 = new Triple(triple2.getSubject(), triple2.getPredicate(), triple1.getSubject()) ; else if (nextRand == 2) // OS join t2 = new Triple(triple1.getObject(), triple2.getPredicate(), triple2.getObject()) ; else // OO join t2 = new Triple(triple2.getSubject(), triple2.getPredicate(), triple1.getObject()) ; } else { t1 = triple1 ; t2 = triple2 ; } p.add(t1) ; p.add(t2) ; return p ; } private static Node boundOrUnbound(Node node) { Random rand = new Random() ; if (rand.nextInt(2) == 0) return node ; return Node.createVariable("x" + Math.abs(rand.nextInt())) ; } private static void splitStatements(Model dataModel) { Set s = new HashSet() ; // Set Set p = new HashSet() ; // Set Set o = new HashSet() ; // Set StmtIterator stmtIter = dataModel.listStatements() ; while (stmtIter.hasNext()) { Statement stmt = (Statement)stmtIter.nextStatement() ; Node subject = stmt.getSubject().asNode() ; Node predicate = stmt.getPredicate().asNode() ; Node object = stmt.getObject().asNode() ; s.add(subject) ; if (! exclude(predicate)) p.add(predicate) ; o.add(object) ; } subjects.addAll(s) ; predicates.addAll(p) ; objects.addAll(o) ; } // Read the command line params private static void readCmdParams(String[] args) throws Exception { for (int i = 0; i < args.length; i++) { if (args[i].equals("--graph")) inGraphFileName = args[i+1] ; else if (args[i].equals("--index")) inIndexFileName = args[i+1] ; else if (args[i].equals("--distinct-predicates")) distinctPredicates = true ; else if (args[i].equals("--runs")) runs = new Integer(args[i+1]).intValue() ; else if (args[i].equals("--triples")) triples = new Integer(args[i+1]).intValue() ; else if (args[i].equals("--ignore-empty-rs")) ignoreEmptyResultSet = true ; else if (args[i].equals("--help")) usage() ; } if (inGraphFileName == null) usage() ; } // Print the usage of the main program private static void usage() { String usage = "arqo.accuracy [options]\n" ; usage += "--graph [file name]\n" ; usage += "--index [file name]\n" ; usage += "--distinct-predicates\n" ; usage += "--runs [default 100]\n" ; usage += "--triples [1 or 2]\n" ; usage += "--ignore-empty-rs\n" ; System.out.println(usage) ; System.exit(0) ; } private static boolean exclude(Node predicate) { String uri = predicate.getURI() ; if (uri.contains(RDF.getURI()) || uri.contains(RDFS.getURI()) || uri.contains(OWL.getURI())) return true ; return false ; } } /* * (c) Copyright 2004, 2005, 2006, 2007 Hewlett-Packard Development Company, LP * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */