<% // @author Dawid Weiss // // PERFORMANCE/USER INTERFACE NOTE: // // What I do here is merely a demonstration. In real life the clustering // process should be done in a separate "processing" stream, most likely // a separate HTML frame that the user's browser requests data to. // We don't want the user to wait with plain snippets until the clusters // are created. // // Also: clustering is resource consuming, so a cache of recent queries // would be in place. Besides, such cache would also be beneficial for the // purpose of re-querying existing clusters (remember that the // clustering extension may be a heuristic returning a DIFFERENT set of // clusters for an identical input). // See www.vivisimo.com for details of how this can be done using frames, or // http://carrot.cs.put.poznan.pl for an example of a Javascript solution. // cluster the hits HitsCluster [] clusters = null; if (clusterer != null) { final long clusteringStart = System.currentTimeMillis(); try { clusters = clusterer.clusterHits( details, Summary.toStrings(summaries) ); final long clusteringDuration = System.currentTimeMillis() - clusteringStart; bean.LOG.info("Clustering took: " + clusteringDuration + " milliseconds."); } catch (Exception e) { // failed to do clustering (see below) } } if (clusterer == null) { %>No clustering extension found.<% } else { if (clusters == null) { %>Unable to do clustering.<% } else if (clusters.length == 0) { %>No clusters found.<% } else { // display top N clusters and top Q documents inside them. int N = 10; int Q = 3; int maxLabels = 2; int displayCounter = 0; N = Math.min(N, clusters.length ); for (int clusterIndex = 0 ; clusterIndex < N ; clusterIndex++) { HitsCluster cluster = clusters[ clusterIndex ]; String [] clusterLabels = cluster.getDescriptionLabels(); // probably leave it on for now //if (cluster.isJunkCluster()) continue; // output cluster label. %>
<% for (int k=0;k0) out.print(", "); out.print( Entities.encode(clusterLabels[k]) ); } %>
<% // now output sample documents from the inside HitDetails[] documents = cluster.getHits(); if (documents.length > 0) { %><% } // ignore subclusters for now, ALTHOUGH HIERARCHICAL CLUSTERING // METHODS DO EXIST AND ARE VERY USEFUL // HitsCluster [] subclusters = cluster.getSubclusters(); } } } %>