/* * (c) Copyright 2009 Hewlett-Packard Development Company, LP * All rights reserved. * [See end of file] */ package structure.radix; import java.nio.ByteBuffer ; import java.util.ArrayDeque ; import java.util.ArrayList ; import java.util.Deque ; import java.util.Iterator ; import java.util.NoSuchElementException ; import org.slf4j.Logger ; import org.slf4j.LoggerFactory ; import org.openjena.atlas.AtlasException ; import org.openjena.atlas.io.IndentedWriter ; import org.openjena.atlas.iterator.Iter ; import org.openjena.atlas.lib.Bytes ; /* http://en.wikipedia.org/wiki/Radix_tree */ public final class RadixTree { // TODO // More checking, all if'ed out // Iteration // Value? // Architecture : action visitor pattern - find&do (overhead?) static boolean logging = true ; static final boolean checking = true ; static final byte[] bytes0 = new byte[]{} ; static Logger log = LoggerFactory.getLogger(RadixTree.class) ; private RadixNode root = null ; public RadixNode getRoot() { return root ; } private interface Action { RadixNode exec(byte[] key, RadixNode node, int N, RadixNode parentNode) ; } /** Find the starting point - call the action */ private static RadixNode applicator(RadixNode root, byte[] key, Action action) { // A convoluted way to "return" with three results by calling on to a handler. // Assumes root is not null. RadixNode nodePrev = null ; // Last node. RadixNode node = root ; // The current node. for(;;) { // Does the prefix (partially) match? int N = node.countMatchPrefix(key) ; if ( N < node.prefix.length ) // Includes negative N return action.exec(key, node, N, nodePrev) ; // else matched up to end of prefix. // Longer or same length key. int j = node.locate(key, node.lenFinish) ; if ( j < 0 || j == node.nodes.size() ) // No match across subnodes - this node is the point of longest match. return action.exec(key, node, node.prefix.length, nodePrev) ; // There is a next node down to try. nodePrev = node ; node = node.nodes.get(j) ; } // Does not happen } private RadixNode _find(byte[] key) { RadixNode node = search(key) ; int N = node.countMatchPrefix(key) ; if ( N == node.prefix.length && node.lenFinish == key.length && node.isLeaf() ) // Exact match return node ; return null ; } public RadixNode find(byte[] key) { if ( root == null ) return null ; return applicator(root, key, findAction) ; } private static Action findAction = new Action() { @Override public RadixNode exec(byte[] key, RadixNode node, int N, RadixNode nodePrev) { if ( N == node.prefix.length && node.lenFinish == key.length && node.isLeaf() ) // Exact match return node ; return null ; } } ; private static Action identityAction = new Action() { @Override public RadixNode exec(byte[] key, RadixNode node, int N, RadixNode nodePrev) { return node ; } } ; private RadixNode search(byte[] key) { if ( root == null ) return null ; return applicator(root, key, identityAction) ; } // Return top changed node. private static Action insertAction = new Action() { @Override public RadixNode exec(byte[] key, RadixNode node, int N, RadixNode nodePrev) { if (logging && log.isDebugEnabled() ) { log.debug("insert: search => "+node) ; log.debug("insert N = "+N) ; } /* Cases: * Not a leaf node (tested above): * 0/ key same as prefix and it's a leaf => already there * 1/ key longer than prefix : N == node.prefix.length * 2/ key same as prefix : N == node.prefix.length but not already in tree. * 3/ key shorter than prefix : 0 <= N < node.prefix.length * 4/ key diverges from prefix : N < 0 * */ // Key already present - we ended at a leaf. if ( N == node.prefix.length && node.lenFinish == key.length && node.isLeaf() ) { if (logging && log.isDebugEnabled() ) log.debug("insert: Already present") ; return null ; } /* Actions * 1 => split, then as 3 ? * 2 => split, then as 3 ? * 3 => new subnode. * 4 => new subnode "" */ if ( N == node.prefix.length ) { // We will modify the node array if ( node.isLeaf() ) { node.nodes = new ArrayList() ; RadixNode n = new RadixNode(node) ; n.prefix = bytes0 ; n.lenStart = node.lenFinish ; n.lenFinish = node.lenFinish ; n.nodes = null ; node.nodes.add(0, n) ; } // Key ends here but it's not a leaf already byte[] prefixNew = Bytes.copyOf(key, node.lenFinish, key.length - node.lenFinish) ; if (logging && log.isDebugEnabled() ) log.debug("Prefix new : "+Bytes.asHex(prefixNew)) ; RadixNode node1 = new RadixNode(node) ; node1.prefix = prefixNew ; node1.lenStart = node.lenFinish ; node1.lenFinish = key.length ; node1.nodes = null ; // It's a leaf. int i = node.locate(prefixNew, 0) ; if ( i > 0 ) error("Duplicate start byte") ; i = -(i+1) ; node.nodes.add(i, node1) ; return node ; } // Key diverges or is shorter than prefix. byte[] prefix1 ; if ( N < 0 ) { // Short key. prefix1 = bytes0 ; N = -(N+1) ; } else prefix1 = Bytes.copyOf(key, N+node.lenStart) ; // The two parts of the prefix. byte[] prefix0 = Bytes.copyOf(node.prefix, 0, N) ; byte[] prefix2 = Bytes.copyOf(node.prefix, N) ; if (logging && log.isDebugEnabled() ) { log.debug("Prefix0 : "+Bytes.asHex(prefix0)) ; log.debug("Prefix1 : "+Bytes.asHex(prefix1)) ; log.debug("Prefix2 : "+Bytes.asHex(prefix2)) ; } // This is the new leaf due to key added. RadixNode node1 = new RadixNode(node) ; node1.prefix = prefix1 ; node1.lenStart = node.lenStart+N ; node1.lenFinish = key.length ; node1.nodes = null ; // ?? // This the tail of the original data RadixNode node2 = new RadixNode(node) ; node2.prefix = prefix2 ; node2.lenStart = node.lenStart+N ; node2.lenFinish = node.lenFinish ; node2.nodes = null ; // Swap if necessary if ( Bytes.compare(prefix1, prefix2) > 0 ) { RadixNode t = node2 ; node2 = node1 ; node1 = t ; } // Spread the original subnodes nodes over the new subnodes. if (node.nodes != null ) { int split = node.locate(node1.prefix, 0) ; if ( checking ) { if ( split > 0 ) error("Found prefix for new inserted key") ; } split = -(split+1) ; if ( split > 0 ) node1.nodes = new ArrayList() ; if ( split < node.nodes.size() ) node2.nodes = new ArrayList() ; if ( logging && log.isDebugEnabled() ) log.debug("Spread nodes: "+split) ; for ( int i = 0 ; i < split ; i++ ) { RadixNode x = node.nodes.get(i) ; // Fix the parents of subnodes. x.parentId = node1.id ; node1.nodes.add(x) ; } for ( int i = split ; i < node.nodes.size() ; i++ ) { RadixNode x = node.nodes.get(i) ; x.parentId = node2.id ; node2.nodes.add(x) ; } } // Alter the node in-place. node.prefix = prefix0 ; node.lenFinish = prefix0.length+node.lenStart ; if ( node.nodes == null ) node.nodes = new ArrayList() ; else node.nodes.clear() ; node.nodes.add(node1) ; node.nodes.add(node2) ; return node ; } } ; public boolean insert(byte[] key) { if ( root == null ) { root = new RadixNode(null) ; root.prefix = key ; root.lenStart = 0 ; root.lenFinish = key.length ; root.nodes = null ; return true ; } return applicator(root, key, insertAction) != null ; } /** Insert - return true if the trie changed */ private boolean _insert(byte[] key) { if (logging && log.isDebugEnabled() ) log.debug("Insert : "+Bytes.asHex(key)) ; key = Bytes.copyOf(key) ; // ??? Always copy later so no need - check if ( root == null ) { root = new RadixNode(null) ; root.prefix = key ; root.lenStart = 0 ; root.lenFinish = key.length ; root.nodes = null ; return true ; } // Location the node that leads to the key. // Returned node will have: // prefix longer than or equal to the key // final RadixNode node = search(key) ; if (logging && log.isDebugEnabled() ) log.debug("insert: search => "+node) ; int N = node.countMatchPrefix(key) ; if (logging && log.isDebugEnabled() ) log.debug("insert N = "+N) ; /* Cases: * Not a leaf node (tested above): * 0/ key same as prefix and it's a leaf => already there * 1/ key shorter than prefix : 0 <= N < node.prefix.length * 2/ key diverges from prefix : N < 0 * 3/ key longer than prefix : N == node.prefix.length * 4/ key same as prefix : N == node.prefix.length * but not already in tree. */ // Key already present - we ended at a leaf. if ( N == node.prefix.length && node.lenFinish == key.length && node.isLeaf() ) { if (logging && log.isDebugEnabled() ) log.debug("insert: Already present") ; return false ; } /* Actions * 1 => split, then as 3 ? * 2 => split, then as 3 ? * 3 => new subnode. * 4 => new subnode "" */ if ( N < node.prefix.length ) { // Cases 1 and 2. // Key diverges or is shorter byte[] prefix1 ; if ( N < 0 ) { // Short key. prefix1 = bytes0 ; N = -(N+1) ; } else prefix1 = Bytes.copyOf(key, N+node.lenStart) ; byte[] prefix0 = Bytes.copyOf(node.prefix, 0, N) ; // Remainder of prefix : Bytes.slice byte[] prefix2 = Bytes.copyOf(node.prefix, N) ; if (logging && log.isDebugEnabled() ) { log.debug("Prefix0 : "+Bytes.asHex(prefix0)) ; log.debug("Prefix1 : "+Bytes.asHex(prefix1)) ; log.debug("Prefix2 : "+Bytes.asHex(prefix2)) ; } // This is the new leaf due to key added. RadixNode node1 = new RadixNode(node) ; node1.prefix = prefix1 ; node1.lenStart = node.lenStart+N ; node1.lenFinish = key.length ; node1.nodes = null ; // ?? // This the tail of the original data RadixNode node2 = new RadixNode(node) ; node2.prefix = prefix2 ; node2.lenStart = node.lenStart+N ; node2.lenFinish = node.lenFinish ; node2.nodes = null ; // Swap if necessary if ( Bytes.compare(prefix1, prefix2) > 0 ) { RadixNode t = node2 ; node2 = node1 ; node1 = t ; } // Spread the original subnodes nodes over the new subnodes. if (node.nodes != null ) { int split = node.locate(node1.prefix, 0) ; if ( checking ) { if ( split > 0 ) error("Found prefix for new inserted key") ; } split = -(split+1) ; if ( split > 0 ) node1.nodes = new ArrayList() ; if ( split < node.nodes.size() ) node2.nodes = new ArrayList() ; if ( logging && log.isDebugEnabled() ) log.debug("Spread nodes: "+split) ; for ( int i = 0 ; i < split ; i++ ) { RadixNode x = node.nodes.get(i) ; x.parentId = node1.id ; node1.nodes.add(x) ; } for ( int i = split ; i < node.nodes.size() ; i++ ) { RadixNode x = node.nodes.get(i) ; x.parentId = node2.id ; node2.nodes.add(x) ; } } node.prefix = prefix0 ; node.lenFinish = prefix0.length+node.lenStart ; if ( node.nodes == null ) node.nodes = new ArrayList() ; else node.nodes.clear() ; node.nodes.add(node1) ; node.nodes.add(node2) ; return true ; } // We will modify the node array if ( node.isLeaf() ) { node.nodes = new ArrayList() ; RadixNode n = new RadixNode(node) ; n.prefix = bytes0 ; n.lenStart = node.lenFinish ; n.lenFinish = node.lenFinish ; n.nodes = null ; node.nodes.add(0, n) ; } byte[] prefixNew = Bytes.copyOf(key, node.lenFinish, key.length - node.lenFinish) ; if (logging && log.isDebugEnabled() ) log.debug("Prefix new : "+Bytes.asHex(prefixNew)) ; RadixNode node1 = new RadixNode(node) ; node1.prefix = prefixNew ; node1.lenStart = node.lenFinish ; node1.lenFinish = key.length ; node1.nodes = null ; // It's a leaf. int i = node.locate(prefixNew, 0) ; if ( i > 0 ) error("Duplicate start byte") ; i = -(i+1) ; node.nodes.add(i, node1) ; return true ; } private static Action deleteAction = new Action() { @Override public RadixNode exec(byte[] key, RadixNode leaf, int N, RadixNode node) { // Key not already present - not a full match (short, diverges) or not-a-leaf. if ( N != leaf.prefix.length || leaf.lenFinish != key.length || ! leaf.isLeaf() ) { if (logging && log.isDebugEnabled() ) log.debug("delete: Not present") ; return null ; } if (logging && log.isDebugEnabled() ) { log.debug("delete: "+Bytes.asHex(key)) ; log.debug(" "+leaf) ; log.debug(" "+node) ; } if ( node == null ) { // Root - deleting the last key. return leaf ; } // Then work on the parent. int i = node.locate(key, leaf.lenStart) ; if ( i < 0 ) error("Can't find child in parent") ; { RadixNode x = node.nodes.remove(i) ; if ( checking && x != leaf ) error("Removing the wrong node") ; } if ( node.nodes.size() == 1 ) { // This other node subnode. // Collapse n into node - that is, pull up all n into node. RadixNode n = node.nodes.remove(0) ; if ( logging && log.isDebugEnabled() ) { log.debug("Collapse node") ; log.debug(" node: "+node) ; log.debug(" n : "+n) ; } int len = node.prefix.length + n.prefix.length ; node.nodes = n.nodes ; // fix up children of n if ( node.nodes != null ) { for ( RadixNode x : node.nodes ) { x.parent = node ; x.parentId = node.id ; } } node.lenFinish = n.lenFinish ; byte [] a = new byte[len] ; System.arraycopy(node.prefix, 0, a, 0, node.prefix.length) ; System.arraycopy(n.prefix, 0, a, node.prefix.length, n.prefix.length) ; if ( logging && log.isDebugEnabled() ) log.debug("New prefix: "+Bytes.asHex(a)) ; node.prefix = a ; if ( logging && log.isDebugEnabled() ) log.debug(" --> : "+node) ; } return node ; } } ; /** Delete - return true if the tree changed (i.e the key was present and so was removed) */ public boolean delete(byte[] key) { if ( root == null ) return false ; boolean b = root.isLeaf() ; RadixNode n = applicator(root, key, deleteAction) ; if ( b && n != null ) root = null ; return n != null ; } public void print() { if ( root == null ) { System.out.println("") ; return ; } root.output(IndentedWriter.stdout) ; IndentedWriter.stdout.flush(); } public long size() { if ( root == null ) return 0 ; RadixNodeVisitor v = new RadixNodeVisitorBase() { int count = 0 ; @Override public void before(RadixNode node) { if (node.isLeaf()) count++ ; } @Override public Object result() { return count ; } } ; root.visit(v) ; return (Integer)v.result() ; } public Iterator iterator() { return iterator(null, null) ; } public Iterator iterator(byte[] start, byte[] finish) { if ( logging && log.isDebugEnabled() ) { log.debug("iterator: start: "+((start==null)?"null":Bytes.asHex(start))) ; log.debug("iterator: finish: "+((finish==null)?"null":Bytes.asHex(finish))) ; } if ( root == null ) { if ( logging && log.isDebugEnabled() ) log.debug("iterator: empty tree") ; return Iter.nullIterator() ; } return new RadixIterator(root, start, finish) ; } private static class RadixIterator implements Iterator { RadixNode node ; ByteBuffer slot = null ; byte[] finish = null ; RadixIterator(RadixNode root, byte[] start, byte[] finish) { this.finish = finish ; node = root ; int N = -1 ; if ( start != null ) { // Find .... slot = ByteBuffer.allocate(start.length) ; for(;;) { // Does the prefix (partially) match? N = node.countMatchPrefix(start) ; int numMatch = N ; if ( numMatch < 0 ) numMatch = -(numMatch+1) ; // else matched up to end of prefix. // Copy all bytes that match. slot = appendBytes(node.prefix, 0, numMatch, slot) ; if ( N < node.prefix.length ) // Includes negative N break ; // Longer or same length key. int j = node.locate(start, node.lenFinish) ; if ( j < 0 || j == node.nodes.size() ) // No match across subnodes - this node is the point of longest match. break ; // There is a next node down to try. node = node.nodes.get(j) ; } } else slot = ByteBuffer.allocate(10) ; //Reallocating? // Now move until leaf. while(!node.isLeaf()) { // Copy as we got. slot = appendBytes(node.prefix, 0, node.prefix.length, slot) ; node = node.nodes.get(0) ; } // But we need to track the key for copying reasons. if ( logging && log.isDebugEnabled() ) { log.debug("initial: "+node) ; log.debug("initial: "+Bytes.asHex(slot.array())) ; } } static ByteBuffer appendBytes(byte[] array, int start, int length, ByteBuffer bb) { if ( bb.position()+length > bb.capacity() ) { ByteBuffer bb2 = ByteBuffer.allocate(bb.capacity()*2 ) ; System.arraycopy(bb.array(), 0, bb2.array(), 0, bb.position()) ; } // System.arraycopy(bb.array(), bb.position(), array, 0, length) ; // bb.position((bb.position()+length)) ; bb.put(array, start, length) ; return bb ; } @Override public boolean hasNext() { if ( slot != null ) return true ; // Move to next leaf. // TODO return false ; } @Override public byte[] next() { if ( ! hasNext() ) throw new NoSuchElementException() ; byte[] x = slot.array() ; slot = null ; return x ; } @Override public void remove() { throw new UnsupportedOperationException() ; } } public void printLeaves() { if ( root == null ) { System.out.println("Tree: empty") ; return ; } final Deque x = new ArrayDeque() ; RadixNodeVisitor v = new RadixNodeVisitorBase() { @Override public void before(RadixNode node) { for ( byte b : node.prefix ) x.addLast(b) ; if ( node.isLeaf() ) System.out.print(" "+x) ; } @Override public void after(RadixNode node) { for ( int i = node.lenStart ; i < node.lenFinish ; i++ ) x.removeLast() ; } } ; root.visit(v) ; System.out.println() ; System.out.flush() ; } static void error(String string) { throw new AtlasException(string) ; } public void check() { if ( root != null ) root.check() ; } } /* * (c) Copyright 2009 Hewlett-Packard Development Company, LP * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */