JSDoc: Source: oak-mongo.js

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
/*global print, _, db, Object, ObjectId */

/** @namespace */
var oak = (function(global){
    "use strict";

    var api;

    api = function(){
        print("Oak Mongo Helpers");
    };

    /**
     * Prints all ids of documents in the nodes collection that contain changes
     * with the given revisions. Example:
     * <p>
     * <pre>oak.changesForRevisions({'r16d63f52ff7-0-1':1, 'r16d63f5b605-0-1':1})</pre>
     * <p>
     * Caution: this method scans the entire nodes collection and will most
     * likely impact performance of the application using the database. Do
     * <b>NOT</b> run this method on a production system!
     *
     * @memberof oak
     * @method oak.changesForRevisions
     */
    api.changesForRevisions = function(revs) {
        revs = revs || {};
        db.nodes.find({}, {_id:1,_revisions:1, _commitRoot:1}).forEach(function(doc) {
            for (var r in revs) {
                if (doc._revisions && doc._revisions[r] || doc._commitRoot && doc._commitRoot[r]) {
                    print(doc._id);
                }
            }
        });
    };

    /**
     * Collects various stats related to Oak usage of Mongo.
     *
     * @memberof oak
     * @method oak.systemStats
     * @returns {object} system stats.
     */
    api.systemStats = function () {
        var result = {};
        result.nodeStats = db.nodes.stats(1024 * 1024);
        result.blobStats = db.blobs.stats(1024 * 1024);
        result.clusterStats = db.clusterNodes.find().toArray();
        result.oakIndexes = db.nodes.find({'_id': /^2\:\/oak\:index\//}).toArray();
        result.hostInfo = db.hostInfo();
        result.rootDoc = db.nodes.findOne({'_id' : '0:/'});
        return result;
    };

    /**
     * Collects various stats related to Oak indexes stored under /oak:index.
     *
     * @memberof oak
     * @method indexStats
     * @returns {Array} index stats.
     */
    api.indexStats = function () {
        var result = [];
        var totalCount = 0;
        var totalSize = 0;
        db.nodes.find({'_id': /^2\:\/oak\:index\//}, {_id: 1}).forEach(function (doc) {
            var stats = api.getChildStats(api.pathFromId(doc._id));
            stats.id = doc._id;
            result.push(stats);

            totalCount += stats.count;
            totalSize += stats.size;
        });

        result.push({id: "summary", count: totalCount, size: totalSize, "simple": humanFileSize(totalSize)});
        return result;
    };

    /**
     * Determines the number of child node (including all sub tree)
     * for a given parent node path. This would be faster compared to
     * {@link getChildStats} as it does not load the doc and works on
     * index only.
     *
     * Note that there might be some difference between db.nodes.count()
     * and countChildren('/') as split docs, intermediate docs are not
     * accounted for
     *
     * @memberof oak
     * @method countChildren
     * @param {string} path the path of a node.
     * @returns {number} the number of children, including all descendant nodes.
     */
    api.countChildren = function(path){
        if (path === undefined) {
            return 0;
        } else if (path != "/") {
            path = path + "/";
        }

        var depth = pathDepth(path);
        var totalCount = 0;
        while (true) {
            var count = db.nodes.count({_id: pathFilter(depth++, path)});
            if( count === 0){
                break;
            }
            totalCount += count;
        }
        return totalCount;
    };

    /**
     * Determines the number of direct child node (excluding any sub tree)
     * for a given parent node path.
     *
     * @memberof oak
     * @method countDirectChildren
     * @param {string} path the path of a node.
     * @returns {number} the number of children, including all descendant nodes.
     */
    api.countDirectChildren = function(path){
        if (path === undefined) {
            return 0;
        } else if (path != "/") {
            path = path + "/";
        }
        var depth = pathDepth(path);
        var totalCount = 0;
        var count = db.nodes.count({_id: pathFilter(depth + 1, path)});
        totalCount += count;
        return totalCount;
    };

    /**
     * Provides stats related to number of child nodes
     * below given path or total size taken by such nodes.
     *
     * @memberof oak
     * @method getChildStats
     * @param {string} path the path of a node.
     * @returns {{count: number, size: number}} statistics about the child nodes
     *          including all descendants.
     */
    api.getChildStats = function(path){
        var count = 0;
        var size = 0;
        this.forEachChild(path, function(doc){
            count++;
            size +=  Object.bsonsize(doc);
        });
        return {"count" : count, "size" : size, "simple" : humanFileSize(size)};
    };

    /**
     * Performs a breadth first traversal for nodes under given path
     * and invokes the passed function for each child node.
     *
     * @memberof oak
     * @method forEachChild
     * @param {string} path the path of a node.
     * @param callable a function to be called for each child node including all
     *        descendant nodes. The MongoDB document is passed as the single
     *        parameter of the function.
     */
    api.forEachChild = function(path, callable) {
        if (path !== undefined && path != "/") {
            path = path + "/";
        }

        var depth = pathDepth(path);
        while (true) {
            var cur = db.nodes.find({_id: pathFilter(depth++, path)});
            if(!cur.hasNext()){
                break;
            }
            cur.forEach(callable);
        }
    };

    /**
     * Returns the path part of the given id.
     *
     * @memberof oak
     * @method pathFromId
     * @param {string} id the id of a Document in the nodes collection.
     * @returns {string} the path derived from the id.
     */
    api.pathFromId = function(id) {
        var index = id.indexOf(':');
        return id.substring(index + 1);
    };

    /**
     * Checks the _lastRev for a given clusterId. The checks starts with the
     * given path and walks up to the root node.
     *
     * @memberof oak
     * @method checkLastRevs
     * @param {string} path the path of a node to check
     * @param {number} clusterId the id of an oak cluster node.
     * @returns {object} the result of the check.
     */
    api.checkLastRevs = function(path, clusterId) {
        return checkOrFixLastRevs(path, clusterId, true);
    };

    /**
     * Fixes the _lastRev for a given clusterId. The fix starts with the
     * given path and walks up to the root node.
     *
     * @memberof oak
     * @method fixLastRevs
     * @param {string} path the path of a node to fix
     * @param {number} clusterId the id of an oak cluster node.
     * @returns {object} the result of the fix.
     */
    api.fixLastRevs = function(path, clusterId) {
        return checkOrFixLastRevs(path, clusterId, false);
    };

    /**
     * Returns statistics about the blobs collection in the current database.
     * The stats include the combined BSON size of all documents. The time to
     * run this command therefore heavily depends on the size of the collection.
     *
     * @memberof oak
     * @method blobStats
     * @returns {object} statistics about the blobs collection.
     */
    api.blobStats = function() {
        var result = {};
        var stats = db.blobs.stats(1024 * 1024);
        var bsonSize = 0;
        db.blobs.find().forEach(function(doc){bsonSize += Object.bsonsize(doc)});
        result.count = stats.count;
        result.size = stats.size;
        result.storageSize = stats.storageSize;
        result.bsonSize = Math.round(bsonSize / (1024 * 1024));
        result.indexSize = stats.totalIndexSize;
        return result;
    };

    /**
     * Find and dumps _id of all documents where the document size exceeds
     * 15MB size. It also dumps progress info after every 10k docs.
     *
     * The ids can be found by grepping for '^id|' pattern
     *
     * > oak.dumpLargeDocIds({db: "aem-author"})
     *
     * @param {object} options pass optional parameters for host, port, db, and filename
     */
    api.dumpLargeDocIds = function (options) {
        options = options || {};
        var sizeLimit = options.sizeLimit || 15 * 1024 * 1024;
        var count = 0;
        var ids = [];
        print("Using size limit: " +  sizeLimit);
        db.nodes.find().forEach(function (doc) {
            var size = Object.bsonsize(doc);
            if (size > sizeLimit) {
                print("id|" + doc._id);
                ids.push(doc._id)
            }
            if (++count % 10000 === 0) {
                print("Traversed #" + count)
            }
        });

        print("Number of large documents : " + ids.length);

        //Dump the export command to dump all such large docs
        if (ids.length > 0) {
            var query = JSON.stringify({_id: {$in: ids}});
            print("Using following export command to tweak the output");

            options.db = db.getName();
            print(createExportCommand(query, options));
        }
    };

    /**
     * Converts the given Revision String into a more human readable version,
     * which also prints the date.
     *
     * @memberof oak
     * @method formatRevision
     * @param {string} rev a revision string.
     * @returns {string} a human readable string representation of the revision.
     */
    api.formatRevision = function(rev) {
        return new Revision(rev).toReadableString();
    };

    /**
     * Removes the complete subtree rooted at the given path.
     *
     * @memberof oak
     * @method removeDescendantsAndSelf
     * @param {string} path the path of the subtree to remove.
     */
    api.removeDescendantsAndSelf = function(path) {
        var count = 0;
        var depth = pathDepth(path);
        var id = depth + ":" + path;
        // current node at path
        var result = db.nodes.deleteMany({_id: id});
        count += result.deletedCount;
        // might be a long path
        result = db.nodes.deleteMany(longPathQuery(path));
        count += result.deletedCount;
        // descendants
        var prefix = path + "/";
        depth++;
        while (true) {
            result = db.nodes.deleteMany(longPathFilter(depth, prefix));
            count += result.deletedCount;
            result = db.nodes.deleteMany({_id: pathFilter(depth++, prefix)});
            count += result.deletedCount;
            if (result.deletedCount === 0) {
                break;
            }
        }
        // descendants further down the hierarchy with long path
        while (true) {
            result = db.nodes.deleteMany(longPathFilter(depth++, prefix));
            if (result.deletedCount === 0) {
                break;
            }
            count += result.deletedCount;
        }
        return {deletedCount : count};
    };

    /**
     * Helper method to find nodes based on Regular Expression.
     *
     * @memberof oak
     * @method regexFind
     * @param {string} pattern the pattern to match the nodes.
     */
    api.regexFind = function(pattern) {
        print(db.nodes.find({_id: {$regex: pattern}}));
        db.nodes.find({_id: {$regex: pattern}}, {_id: 1}).forEach(function(doc) {
            print(doc._id);
        });
    }

    /**
     * Remove the complete subtree of all the nodes matching a regex pattern.
     * Use regexFind to find the nodes that match the pattern prior deletion.
     *
     * @memberof oak
     * @method removeDescendantsAndSelfMatching
     * @param {string} pattern the pattern to match the nodes to be removed.
     */
    api.removeDescendantsAndSelfMatching = function(pattern) {
        var count = 0;
        db.nodes.find({_id: {$regex: pattern}}, {_id: 1}).forEach(function(doc) {
            print("Removing " + doc._id + " and its children");
            var result = api.removeDescendantsAndSelf(api.pathFromId(doc._id));
            count += result.deletedCount;
            print("nRemoved : " + result.deletedCount);
        });
        print("Total removed : " + count);
    }

    /**
     * Wrapper function to clean all the /tmpXXXXXX nodes from the repository.
     *
     * @memberof oak
     * @method removeRootTempNodes
     */
    api.removeRootTempNodes = function() {
        this.removeDescendantsAndSelfMatching("^1:/tmp.+");
    }

    /**
     * List all the nodes under /tmpXXXXXX.
     *
     * @memberof oak
     * @method listRootTempNodes
     */
    api.listRootTempNodes = function() {
        this.regexFind("^1:/tmp.+");
    }

    /**
     * List all checkpoints.
     *
     * @memberof oak
     * @method listCheckpoints
     * @returns {object} all checkpoints
     */
    api.listCheckpoints = function() {
        var result = {};
        var doc = db.settings.findOne({_id:"checkpoint"});
        if (doc == null) {
            print("No checkpoint document found.");
            return;
        }
        var data = doc.data;
        var r;
        for (r in data) {
            var rev = new Revision(r);
            var exp;
            if (data[r].charAt(0) == '{') {
                exp = JSON.parse(data[r])["expires"];
            } else {
                exp = data[r];
            }
            result[r] = {created:rev.asDate(), expires:new Date(parseInt(exp, 10))};
        }
        return result;
    };

    /**
     * Removes all checkpoints older than a given Revision.
     *
     * @memberof oak
     * @method removeCheckpointsOlderThan
     * @param {string} rev checkpoints older than this revision are removed.
     * @returns {object} the result of the MongoDB update.
     */
    api.removeCheckpointsOlderThan = function(rev) {
        if (rev === undefined) {
            print("No revision specified");
            return;
        }
        var r = new Revision(rev);
        var unset = {};
        var cps = api.listCheckpoints();
        var x;
        var num = 0;
        for (x in cps) {
            if (r.isNewerThan(new Revision(x))) {
                unset["data." + x] = "";
                num++;
            }
        }
        if (num > 0) {
            var update = {};
            update["$inc"] = {_modCount: NumberLong(1)};
            update["$unset"] = unset;
            return db.settings.update({_id:"checkpoint"}, update);
        } else {
            print("No checkpoint older than " + rev);
        }
    };

    /**
     * Removes all collision markers on the document with the given path and
     * clusterId. This method will only remove collisions when the clusterId
     * is inactive.
     * This corresponds to DocumentNodeStore.cleanRootCollisions(), which is
     * part of a startup and normal background update.
     *
     * @memberof oak
     * @method removeCollisions
     * @param {string} path the path of a document
     * @param {number} clusterId collision markers for this clusterId will be removed.
     * @param {number} [limit=1000000] maximum number of collision markers to remove.
     * @returns {object} the result of the MongoDB update.
     */
    api.removeCollisions = function(path, clusterId, limit) {
        if (path === undefined) {
            print("No path specified");
            return;
        }
        if (clusterId === undefined) {
            print("No clusterId specified");
            return;
        }
        if (limit === undefined) {
            limit = 1000000;
        }
        // refuse to remove when clusterId is marked active
        var clusterNode = db.clusterNodes.findOne({_id: clusterId.toString()});
        if (clusterNode && clusterNode.state == "ACTIVE") {
            print("Cluster node with id " + clusterId + " is active!");
            print("Can only remove collisions for inactive cluster node.");
            return;
        }

        var doc = this.findOne(path);
        if (!doc) {
            print("No document for path: " + path);
            return;
        }
        var unset = {};
        var r;
        var num = 0;
        for (r in doc._collisions) {
            if (new Revision(r).getClusterId() == clusterId) {
                unset["_collisions." + r] = "";
                num++;
            }
            if (num >= limit) {
                break;
            }
        }
        if (num > 0) {
            var update = {};
            update["$inc"] = {_modCount: NumberLong(1)};
            update["$unset"] = unset;
            print("Removing " + num + " collisions for clusterId " + clusterId);
            return db.nodes.update({_id: pathDepth(path) + ":" + path}, update);
        } else {
            print("No collisions found for clusterId " + clusterId);
        }
    };

    /**
     * Removes all unmerged branches on the document with the given path and
     * clusterId. This method will only remove unmerged branches when the
     * clusterId is inactive.
     * This corresponds to DocumentNodeStore.cleanOrphanedBranches(), which is
     * part of a startup and normal background update.
     *
     * @memberof oak
     * @method removeUnmergedBranches
     * @param {string} path the path of a document
     * @param {number} clusterId collision markers for this clusterId will be removed.
     * @param {number} [limit=1000000] maximum number of unmerged branches to remove.
     * @returns {object} the result of the MongoDB update.
     */
    api.removeUnmergedBranches = function(path, clusterId, limit) {
        if (path === undefined) {
            print("No path specified");
            return;
        }
        if (clusterId === undefined) {
            print("No clusterId specified");
            return;
        }
        if (limit === undefined) {
            limit = 1000000;
        }
        // refuse to remove when clusterId is marked active
        var clusterNode = db.clusterNodes.findOne({_id: clusterId.toString()});
        if (clusterNode && clusterNode.state == "ACTIVE") {
            print("Cluster node with id " + clusterId + " is active!");
            print("Can only remove unmerged branches for inactive cluster node.");
            return;
        }

        var doc = this.findOne(path);
        if (!doc) {
            print("No document for path: " + path);
            return;
        }
        var unset = {};
        var r;
        var num = 0;
        for (r in doc._revisions) {
            if (new Revision(r).getClusterId() != clusterId) {
                continue;
            }
            if (doc._revisions[r].startsWith("br")) {
                unset["_revisions." + r] = "";
                unset["_bc." + r] = "";
                num++;
            }
            if (num >= limit) {
                break;
            }
        }
        if (num > 0) {
            var update = {};
            update["$inc"] = {_modCount: NumberLong(1)};
            update["$unset"] = unset;
            print("Removing " + num + " unmerged branches for clusterId " + clusterId);
            return db.nodes.update({_id: pathDepth(path) + ":" + path}, update);
        } else {
            print("No unmerged branches found for clusterId " + clusterId);
        }
    };

    /**
     * Prtints the sizes of all revisions by property.
     * This is useful for large documents to quickly see which property/ies are affected
     *
     * @memberof oak
     * @method propertySizes
     * @param {string} path the path of a document
     * @param {number} sizeLargerThan only show properties larger than this, defaults to 1
     */
    api.propertySizes = function(path, sizeLargerThan) {
        if (path === undefined) {
            print("No path specified");
            return;
        }
        if (sizeLargerThan == undefined) {
            sizeLargerThan = 1;
        }
        print("loading document at " + path);
        var doc = this.findOne(path);
        if (!doc) {
            print("No document for path: " + path);
            return;
        }
        var overall = Object.bsonsize(doc);
        print("overall size : " + overall);
        var prop;
        for (prop in doc) {
            if (prop == "_id") {
                continue;
            }
            var hasown = doc.hasOwnProperty(prop)
            var subdoc = doc[prop];
            var thetype = Object.prototype.toString.call(subdoc);
            var isBson = (thetype == "[object BSON]");
            if (!isBson) {
                //print("   (not a bson, skipping " + prop + ")");
                continue;
            }
            var subdocsize = Object.bsonsize(subdoc);
            if (subdocsize <= sizeLargerThan) {
                //print("   (too small to report " + prop + ")");
                continue;
            }
            print(" - property " + prop + " size : " + subdocsize);
        }
    }

    /**
     * Prints the count of property revisions by clusterId
     * The output is using the pseudo-revision format used elsewhere already
     * where timestamp and count are 0 - mainly to point out the clusterId it belongs to
     * This is useful for large documents to quickly see which clusterId was the
     * most frequent writer.
     *
     * @memberof oak
     * @method propertyClusterIdCounts
     * @param {string} path the path of a document
     */
    api.propertyClusterIdCounts = function(path) {
        if (path === undefined) {
            print("No path specified");
            return;
        }
        print("loading document at " + path);
        var doc = this.findOne(path);
        if (!doc) {
            print("No document for path: " + path);
            return;
        }
        var prop;
        var stats = {};
        for (prop in doc) {
            if (prop == "_id") {
                continue;
            }
            var subdoc = doc[prop];
            var r;
            var clusterIds = undefined;
            for (r in subdoc) {
                if (!subdoc.hasOwnProperty(r)) {
                    continue;
                }
                var v = subdoc[r];
                var clusterId = "r0-0-" + new Revision(r).getClusterId();
                if (clusterIds === undefined) {
                    clusterIds = {};
                }
                var existing = clusterIds[clusterId];
                if (existing === undefined) {
                    existing = 1;
                } else {
                    existing++;
                }
                clusterIds[clusterId] = existing;
            }
            if (clusterIds !== undefined) {
                stats[prop] = clusterIds;
            }
        }
        // pretty format the output using stringify
        print(JSON.stringify(stats, null, 8));
    }

    /**
     * Prtints commit value of all branch commits.
     *
     * @memberof oak
     * @method branchCommitValues
     * @param {string} path the path of a document
     */
    api.branchCommitValues = function(path) {
        if (path === undefined) {
            print("No path specified");
            return;
        }
        print("loading document at " + path);
        var doc = this.findOne(path);
        if (!doc) {
            print("No document for path: " + path);
            return;
        }
        var num = 0;
        var rev;
        var cachedRoot = this.findOne("/");
        for (rev in doc._bc) {
            var commitValue = this.getCommitValue("/", rev, cachedRoot)
            if (commitValue && commitValue[rev] && commitValue[rev].startsWith("c-")) {
                print(" - branch change " + rev + " is committed");
                continue;
            }
            print(" - branch change " + rev + " is not or not yet committed");
            num++;
        }
        print("Number of unmerged branch changes : " + num);
    }

    /**
     * Approximative calculation of the size of the
     * object passed. Typically that object is a bson,
     * but over time more cases should be supported.
     *
     * @memberof oak
     * @method sizeOf
     * @param {object} obj the object, eg bson, string,
     * for which to calculate the size, approximatively
     */
    api.sizeOf = function(obj) {
        var thetype = Object.prototype.toString.call(obj);
        if (thetype == "[object BSON]") {
            return Object.bsonsize(obj);
        } else if (thetype == "[object Null]") {
            return 0;
        } else if (thetype == "[object String]") {
            return obj.length;
        }
        print("sizeOf: obj not a bson but : " + thetype);
        return 42;
    }

    /**
     * Prints the commit value of all branch commits
     * of the provided path and property.
     * Plus also prints a stats summary at the end.
     * Useful to determine details of potential garbage.
     *
     * @memberof oak
     * @method unmergedBranchStatsForProperty
     * @param {string} path the path of a document
     * @param {string} property the property to inspect
     */
    api.unmergedBranchStatsForProperty = function(path, property) {
        if (path === undefined) {
            print("No path specified");
            return;
        }
        print("loading document at " + path);
        var doc = this.findOne(path);
        if (!doc) {
            print("No document for path: " + path);
            return;
        }
        var subdoc = doc[property];
        var num = 0;
        var totalGarbage  = 0;
        var cachedRoot = this.findOne("/");
        var propertyGarbageSizes = {};
        var rev, clusterId, clusterIdGarbage;
        for (rev in doc._bc) {
            var commitValue = this.getCommitValue("/", rev, cachedRoot)
            if (commitValue && commitValue[rev] && commitValue[rev].startsWith("c-")) {
                print(" - branch change " + rev + " is committed");
                continue;
            }
            var garbageSize = 0;
            var propertyValue = subdoc[rev];
            if (propertyValue === undefined) {
                print(" - branch change " + rev + " is unmerged but property " + property + " not affected");
                continue;
            }
            garbageSize = this.sizeOf(propertyValue);
            clusterId = new Revision(rev).getClusterId();
            print(" - branch change " + rev + " (clusterId " + clusterId + ") is unmerged of size " + garbageSize);
            num++;
            totalGarbage+=garbageSize;
            clusterIdGarbage = propertyGarbageSizes[clusterId];
            if (clusterIdGarbage === undefined) {
                clusterIdGarbage = 0;
            }
            clusterIdGarbage += garbageSize;
            propertyGarbageSizes[clusterId] = clusterIdGarbage;
        }
        print("Number of unmerged branch changes : " + num + " of total size " + totalGarbage);
        print("propertyGarbageSizes :");
        print(JSON.stringify(propertyGarbageSizes, null, 2));
    }

    /**
     * Removes unmerged branch changes on the document with the given path
     * and clusterId. This method will only remove unmerged branch changes when
     * the clusterId is inactive.
     * On big documents with write contention it is advisable to limit the
     * number of unmerged branch changes to remove in one go. Otherwise MongoDB
     * may have difficulties applying the change to the document.
     *
     * @memberof oak
     * @method removeUnmergedBranchChanges
     * @param {string} path the path of a document
     * @param {number} clusterId unmerged branch changes for this clusterId will be removed.
     * @param {number} [limit=1000000] maximum number of unmerged branches to remove.
     * @returns {object} the result of the MongoDB update.
     */
    api.removeUnmergedBranchChanges = function(path, clusterId, limit) {
        if (path === undefined) {
            print("No path specified");
            return;
        }
        if (clusterId === undefined) {
            print("No clusterId specified");
            return;
        }
        if (limit === undefined) {
            limit = 1000000;
        }
        // refuse to remove when clusterId is marked active
        var clusterNode = db.clusterNodes.findOne({_id: clusterId.toString()});
        if (clusterNode && clusterNode.state == "ACTIVE") {
            print("Cluster node with id " + clusterId + " is active!");
            print("Can only remove unmerged branches for inactive cluster node.");
            return;
        }

        var doc = this.findOne(path);
        if (!doc) {
            print("No document for path: " + path);
            return;
        }
        var unset = {};
        var r;
        var num = 0;
        for (r in doc._bc) {
            if (new Revision(r).getClusterId() != clusterId) {
                continue;
            }

            var commitValue = this.getCommitValue("/", r)
            if (commitValue && commitValue[r] && commitValue[r].startsWith("c-")) {
                print("Branch change " + r + " is not garbage");
                continue;
            }

            for (var key in doc) {
                if (doc.hasOwnProperty(key) && doc[key][r]) {
                    unset[key + "." + r] = "";
                }
            }
            num++;

            if (num >= limit) {
                break;
            }
        }
        if (num > 0) {
            var update = {};
            update["$inc"] = {_modCount: NumberLong(1)};
            update["$unset"] = unset;
            print("Removing " + num + " unmerged branches for clusterId " + clusterId);
            // print(JSON.stringify(update));
            return db.nodes.update({_id: pathDepth(path) + ":" + path}, update);
        } else {
            print("No unmerged branches found for clusterId " + clusterId);
        }
    };

    /**
     * Finds the document with the given path.
     *
     * @memberof oak
     * @method findOne
     * @param {string} path the path of the document.
     * @param {boolean} [longPaths=false] if true, it will extend the search
     *        to look for long paths.
     * @returns {object} the document or null if it doesn't exist.
     */
    api.findOne = function(path, longPaths) {
        if (path === undefined) {
            return null;
        }
        if (longPaths === undefined || longPaths === false) {
            return db.nodes.findOne({_id: pathDepth(path) + ":" + path});
        } else {
            var depth = pathDepth(path);
            return db.nodes.findOne(longPathFilter(depth, path));
        }
    };

    /**
     * Checks the history of previous documents at the given path. Orphaned
     * references to removed previous documents are counted and listed when
     * run with verbose set to true.
     *
     * @memberof oak
     * @method checkHistory
     * @param {string} path the path of the document.
     * @param {boolean} [verbose=false] if true, the result object will contain a list
     *        of dangling references to previous documents.
     * @param {boolean} [ignorePathLen=false] whether to ignore a long path and
     *        still try to read it from MongoDB.
     * @returns {object} the result of the check.
     */
    api.checkHistory = function(path, verbose, ignorePathLen) {
        return checkOrFixHistory(path, false, verbose, ignorePathLen);
    };

    /**
     * Lists the descendant documents at a given path.
     *
     * @memberof oak
     * @method listDescendants
     * @param {string} path list the descendants of the document with this path.
     */
    api.listDescendants = function(path) {
        if (path === undefined) {
            return null;
        }
        var numDescendants = 0;
        print("Listing descendants for "+path);
        this.forEachChild(path, function(aChild) {
            print(api.pathFromId(aChild._id));
            numDescendants++;
        });
        print("Found " + numDescendants + " descendants");
    };

    /**
     * Lists the children at a given path.
     *
     * @memberof oak
     * @method listChildren
     * @param {string} path list the children of the document with this path.
     */
    api.listChildren = function(path) {
        if (path === undefined) {
            return null;
        }
        var numChildren = 0;
        print("Listing children for "+path);
        var prefix;
        if (path == "/") {
            prefix = path;
        } else {
            prefix = path + "/";
        }
        db.nodes.find({_id: pathFilter(pathDepth(path) + 1, prefix)}, {_id: 1}).forEach(function(doc) {
            print(api.pathFromId(doc._id));
            numChildren++;
        });
        print("Found " + numChildren + " children");
    };

    /**
     * Same as checkHistory except it goes through ALL descendants as well!
     *
     * @memberof oak
     * @method checkDeepHistory
     * @param {string} path the path of the document.
     * @param {boolean} [verbose=false] if true, the result object will contain a list
     *        of dangling references to previous documents.
     */
    api.checkDeepHistory = function(path, verbose) {
        checkOrFixDeepHistory(path, false, false, verbose);
    };

    /**
     * Preparation step which scans through all descendants and prints out
     * 'fixHistory' for those that need fixing of their 'dangling references'.
     * <p>
     * See fixHistory for parameter details.
     * <p>
     * Run this command via something as follows:
     * <p>
     *  mongo &lt;DBNAME> -eval "load('oak-mongo.js'); oak.prepareDeepHistory('/');" > fix.js
     *
     * @memberof oak
     * @method prepareDeepHistory
     * @param {string} path the path of a document.
     * @param {boolean} [verbose=false] if true, the result object will contain a list
     *        of dangling references to previous documents.
     */
    api.prepareDeepHistory = function(path, verbose) {
        checkOrFixDeepHistory(path, false, true, verbose);
    };

    /**
     * Same as fixHistory except it goes through ALL descendants as well!
     *
     * @memberof oak
     * @method fixDeepHistory
     * @param {string} path the path of the document.
     * @param {boolean} [verbose=false] if true, the result object will contain a list
     *        of removed references to previous documents.
     */
    api.fixDeepHistory = function(path, verbose) {
        checkOrFixDeepHistory(path, true, false, verbose);
    };

    /**
     * Repairs the history of previous documents at the given path. Orphaned
     * references to removed previous documents are cleaned up and listed when
     * run with verbose set to true.
     *
     * @memberof oak
     * @method fixHistory
     * @param {string} path the path of the document.
     * @param {boolean} [verbose=false] if true, the result object will contain a list
     *        of removed references to previous documents.
     * @returns {object} the result of the fix.
     */
    api.fixHistory = function(path, verbose) {
        return checkOrFixHistory(path, true, verbose, true);
    };

    /**
     * Returns the commit value entry for the change with the given revision.
     *
     * @memberof oak
     * @method getCommitValue
     * @param {string} path the path of a document.
     * @param {string} revision the revision of a change on the document.
     * @returns {object} the commit entry for the given revision or null if
     *          there is none.
     */
    api.getCommitValue = function(path, revision) {
        var doc = this.findOne(path);
        if (!doc) {
            return null;
        }
        if (revision === undefined) {
            print("No revision specified");
        }
        // check _revisions
        var entry = getRevisionEntry(doc, path, revision);
        if (entry) {
            return entry;
        }

        // get commit root
        entry = getEntry(doc, "_commitRoot", revision);
        if (!entry) {
            var prev = findPreviousDocument(path, "_commitRoot", revision);
            if (prev) {
                entry = getEntry(prev, "_commitRoot", revision);
            }
        }
        if (!entry) {
            return null;
        }
        var commitRootPath = getCommitRootPath(path, parseInt(entry[revision]));
        doc = this.findOne(commitRootPath);
        if (!doc) {
            return null;
        }
        return getRevisionEntry(doc, commitRootPath, revision);
    };
    
    /**
     * Prints mongoexport command to export all documents related to given path.
     * Related documents refer to all documents in the hierarchy and their split documents.
     * e.g.
     * > oak.printMongoExportCommand("/etc", {db: "aem-author"})
     *
     * @memberof oak
     * @method printMongoExportCommand
     * @param {string} path the path of the document.
     * @param {object} options pass optional parameters for host, port, db, and filename 
     * @returns {string} command line which can be used to export documents using mongoexport
     */

    api.printMongoExportCommand = function (path, options) {
        return createExportCommand(JSON.stringify(getDocAndHierarchyQuery(path)), options);
    };

    /**
     * Prints mongoexport command to export oplog entries around time represented by revision.
     * e.g.
     * > oak.printOplogSliceCommand("r14e64620028-0-1", {db: "aem-author"})
     * Note, this assumed that time on mongo instance is synchronized with time on oak instance. If that's
     * not the case, then adjust revStr to account for the difference.
     *
     * @memberof oak
     * @method printOplogSliceCommand
     * @param {string} revStr revision string around which oplog is to be exported.
     * @param {object} options pass optional parameters for host, port, db, filename, oplogTimeBuffer
     * @returns {string} command line which can be used to export oplog entries using mongoexport
     */

    api.printOplogSliceCommand = function (revStr, options) {
        options = options || {};
        var host = options.host || "127.0.0.1";
        var port = options.port || "27017";
        var db = options.db || "oak";
        var filename = options.filename || "oplog.json";
        var oplogTimeBuffer = options.oplogTimeBuffer || 10;

        var rev = new Revision(revStr);
        var revTimeInSec = rev.asDate().getTime()/1000;
        var startOplogTime = Math.floor(revTimeInSec - oplogTimeBuffer);
        var endOplogTime = Math.ceil(revTimeInSec + oplogTimeBuffer);

        var query = '{"ns" : "' + db + '.nodes", "ts": {"$gte": Timestamp(' + startOplogTime
                                                + ', 1), "$lte": Timestamp(' + endOplogTime + ', 1)}}';

        var mongoExportCommand = "mongoexport"
                                    + " --host " + host
                                    + " --port " + port
                                    + " --db local"
                                    + " --collection oplog.rs"
                                    + " --out " + filename
                                    + " --query '" + query + "'";

        return mongoExportCommand;
    };

    //~--------------------------------------------------< internal >

    var createExportCommand = function (query, options) {
        options = options || {};
        var host = options.host || "127.0.0.1";
        var port = options.port || "27017";
        var db = options.db || "oak";
        var filename = options.filename || "all-required-nodes.json"

        return "mongoexport"
            + " --host " + host
            + " --port " + port
            + " --db " + db
            + " --collection nodes"
            + " --out " + filename
            + " --query '" + query + "'";
    };

    var checkOrFixDeepHistory = function(path, fix, prepare, verbose) {
        if (prepare) {
            // not issuing any header at all
        } else if (fix) {
            print("Fixing   "+path+" plus all descendants...");
        } else {
            print("Checking "+path+" plus all descendants...");
        }
        var count = 0;
        var ignored = 0;
        var affected = 0;
        api.forEachChild(path, function(aChild) {
            var p = api.pathFromId(aChild._id);
            var result = checkOrFixHistory(p, fix, verbose, true);
            if (result) {
                if (prepare) {
                    var numDangling = result.numPrevLinksDangling;
                    if (numDangling!=0) {
                        print("oak.fixHistory('"+p+"');");
                        affected++;
                    }
                } else if (fix) {
                    var numDangling = result.numPrevLinksRemoved;
                    if (numDangling!=0) {
                        print(" - path: "+p+" removed "+numDangling+" dangling previous revisions");
                        affected++;
                    }
                } else {
                    var numDangling = result.numPrevLinksDangling;
                    if (numDangling!=0) {
                        print(" - path: "+p+" has "+numDangling+" dangling previous revisions");
                        affected++;
                    }
                }
                if (!prepare && (++count%10000==0)) {
                    print("[checked "+count+" so far ("+affected+" affected, "+ignored+" ignored) ...]");
                }
            } else {
                if (!prepare) {
                    print(" - could not handle "+p);
                }
                ignored++;
            }
        });
        if (!prepare) {
            print("Total: "+count+" handled, "+affected+" affected, "+ignored+" ignored (path too long)");
            print("done.");
        }
    };

    var getRevisionEntry = function (doc, path, revision) {
        var entry = getEntry(doc, "_revisions", revision);
        if (entry) {
            return entry;
        }
        var prev = findPreviousDocument(path, "_revisions", revision);
        if (prev) {
            entry = getEntry(prev, "_revisions", revision);
            if (entry) {
                return entry;
            }
        }
    };
    
    var getCommitRootPath = function(path, depth) {
        if (depth == 0) {
            return "/";
        }
        var idx = 0;
        while (depth-- > 0 && idx != -1) {
            idx = path.indexOf("/", idx + 1);
        }
        if (idx == -1) {
            idx = path.length;
        }
        return path.substring(0, idx);
    };
    
    var getEntry = function(doc, name, revision) {
        var result = null;
        if (doc && doc[name] && doc[name][revision]) {
            result = {};
            result[revision] = doc[name][revision];
        }
        return result;
    };
    
    var findPreviousDocument = function(path, name, revision) {
        var rev = new Revision(revision);
        if (path === undefined) {
            print("No path specified");
            return;
        }
        if (path.length > 165) {
            print("Path too long");
            return;
        }
        var doc = api.findOne(path);
        if (!doc) {
            return null;
        }
        var result = null;
        forEachPrev(doc, function traverse(d, high, low, height) {
            var highRev = new Revision(high);
            var lowRev = new Revision(low);
            if (highRev.getClusterId() != rev.getClusterId() 
                    || lowRev.isNewerThan(rev) 
                    || rev.isNewerThan(highRev)) {
                return;
            }
            
            var id = prevDocIdFor(path, high, height);

            var prev = db.nodes.findOne({_id: id });
            if (prev) {
                if (prev[name] && prev[name][revision]) {
                    result = prev;
                } else {
                    forEachPrev(prev, traverse);
                }
            }
        });
        return result;
    };

    var checkOrFixHistory = function(path, fix, verbose, ignorePathLen) {
        if (path === undefined) {
            print("No path specified");
            return;
        }
        if (!ignorePathLen && (path.length > 165)) {
            print("Path too long");
            return;
        }

        var doc = api.findOne(path);
        if (!doc) {
            return null;
        }

        var result = {};
        result._id = pathDepth(path) + ":" + path;
        if (verbose) {
            result.prevDocs = [];
            if (fix) {
                result.prevLinksRemoved = [];
            } else {
                result.prevLinksDangling = [];
            }
        }
        result.numPrevDocs = 0;
        if (fix) {
            result.numPrevLinksRemoved = 0;
        } else {
            result.numPrevLinksDangling = 0;
        }


        forEachPrev(doc, function traverse(d, high, low, height) {
            var id = prevDocIdFor(path, high, height);
            var prev = db.nodes.findOne({_id: id });
            if (prev) {
                if (result.prevDocs) {
                    result.prevDocs.push(high + "/" + height);
                }
                result.numPrevDocs++;
                if (parseInt(height) > 0) {
                    forEachPrev(prev, traverse);
                }
            } else if (fix) {
                if (result.prevLinksRemoved) {
                    result.prevLinksRemoved.push(high + "/" + height);
                }
                result.numPrevLinksRemoved++;
                var update = {};
                update.$inc = {_modCount : NumberLong(1)};
                if (d._sdType == 40) { // intermediate split doc type
                    update.$unset = {};
                    update.$unset["_prev." + high] = 1;
                } else {
                    update.$set = {};
                    update.$set["_stalePrev." + high] = height;
                }
                db.nodes.update({_id: d._id}, update);
            } else {
                if (result.prevLinksDangling) {
                    result.prevLinksDangling.push(high + "/" + height);
                }
                result.numPrevLinksDangling++;
            }
        });
        return result;
    };

    var forEachPrev = function(doc, callable) {
        var stalePrev = doc._stalePrev;
        if (!stalePrev) {
            stalePrev = {};
        }
        var r;
        for (r in doc._prev) {
            var value = doc._prev[r];
            var idx = value.lastIndexOf("/");
            var height = value.substring(idx + 1);
            var low = value.substring(0, idx);
            if (stalePrev[r] == height) {
                continue;
            }
            callable.call(this, doc, r, low, height);
        }
    };

    var checkOrFixLastRevs = function(path, clusterId, dryRun) {
         if (path === undefined) {
            print("Need at least a path from where to start check/fix.");
            return;
         }
         var result = [];
         var lastRev;
         if (path.length == 0 || path.charAt(0) != '/') {
            return "Not a valid absolute path";
         }
         if (clusterId === undefined) {
            clusterId = 1;
         }
         while (true) {
            var doc = db.nodes.findOne({_id: pathDepth(path) + ":" + path});
            if (doc) {
                var revStr = doc._lastRev["r0-0-" + clusterId];
                if (revStr) {
                    var rev = new Revision(revStr);
                    if (lastRev && lastRev.isNewerThan(rev)) {
                        if (dryRun) {
                            result.push({_id: doc._id, _lastRev: rev.toString(), needsFix: lastRev.toString()});
                        } else {
                            var update = {$set:{}};
                            update.$set["_lastRev.r0-0-" + clusterId] = lastRev.toString();
                            db.nodes.update({_id: doc._id}, update);
                            result.push({_id: doc._id, _lastRev: rev.toString(), fixed: lastRev.toString()});
                        }
                    } else {
                        result.push({_id: doc._id, _lastRev: rev.toString()});
                        lastRev = rev;
                    }
                }
            }
            if (path == "/") {
                break;
            }
            var idx = path.lastIndexOf("/");
            if (idx == 0) {
                path = "/";
            } else {
                path = path.substring(0, idx);
            }
         }
         return result;
    };

    var Revision = function(rev) {
        var dashIdx = rev.indexOf("-");
        this.rev = rev;
        this.timestamp = parseInt(rev.substring(1, dashIdx), 16);
        this.counter = parseInt(rev.substring(dashIdx + 1, rev.indexOf("-", dashIdx + 1)), 16);
        this.clusterId = parseInt(rev.substring(rev.lastIndexOf("-") + 1), 16);
    };

    Revision.prototype.toString = function () {
        return this.rev;
    };

    Revision.prototype.isNewerThan = function(other) {
        if (this.timestamp > other.timestamp) {
            return true;
        } else if (this.timestamp < other.timestamp) {
            return false;
        } else {
            return this.counter > other.counter;
        }
    };

    Revision.prototype.toReadableString = function () {
        return this.rev + " (" + this.asDate().toString() + ")"
    };

    Revision.prototype.asDate = function() {
        return new Date(this.timestamp);
    };

    Revision.prototype.getClusterId = function() {
        return this.clusterId;
    };

    var pathDepth = function(path){
        if(path === '/'){
            return 0;
        }
        var depth = 0;
        for(var i = 0; i < path.length; i++){
            if(path.charAt(i) === '/'){
                depth++;
            }
        }
        return depth;
    };
    
    var prevDocIdFor = function(path, high, height) {
        var p = "p" + path;
        if (p.charAt(p.length - 1) != "/") {
            p += "/";
        }
        p += high + "/" + height;
        return (pathDepth(path) + 2) + ":" + p;
    };

    var pathFilter = function (depth, prefix){
        return new RegExp("^"+ depth + ":" + escapeForRegExp(prefix));
    };

    var longPathFilter = function (depth, prefix) {
        var filter = {};
        filter._id = new RegExp("^" + depth + ":h");
        filter._path = new RegExp("^" + escapeForRegExp(prefix));
        return filter;
    };

    var longPathQuery = function (path) {
        var query = {};
        query._id = new RegExp("^" + pathDepth(path) + ":h");
        query._path = path;
        return query;
    };

    //http://stackoverflow.com/a/20732091/1035417
    var humanFileSize = function (size) {
        var i = Math.floor( Math.log(size) / Math.log(1024) );
        return ( size / Math.pow(1024, i) ).toFixed(2) * 1 + ' ' + ['B', 'kB', 'MB', 'GB', 'TB'][i];
    };
    
    // http://stackoverflow.com/questions/3561493/is-there-a-regexp-escape-function-in-javascript
    var escapeForRegExp = function(s) {
        return s.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
    };

    var getDocAndHierarchyQuery = function (path) {
        var paths = getHierarchyPaths(path);

        var ins = [];
        var ors = [];
        paths.forEach(function (path) {
            ins.push(pathDepth(path) + ':' + path);

            var depth = pathDepth(path);
            var splitDocRegex = '^' + (depth+2) + ':p' + path + (depth==0?'':'/');

            ors.push({_id : {$regex : splitDocRegex}});
        });

        ors.push({_id : {$in : ins}});

        return {$or : ors}
    };

    var getHierarchyPaths = function (path) {
        var pathElems = path.split("/");
        var lastPath = "";
        var paths = ["/"];

        pathElems.forEach(function (pathElem) {
            //avoid empty path elems like "/".split("/")->["", ""] or "/a".split("/")->["", "a"]
            if (pathElem != "") {
                lastPath = lastPath + "/" + pathElem;
                paths.push(lastPath);
            }
        });

        return paths;
    };

    return api;
}(this));