| 1 |
szetszwo |
790015 |
/** |
| 2 |
|
|
* Licensed to the Apache Software Foundation (ASF) under one |
| 3 |
|
|
* or more contributor license agreements. See the NOTICE file |
| 4 |
|
|
* distributed with this work for additional information |
| 5 |
|
|
* regarding copyright ownership. The ASF licenses this file |
| 6 |
|
|
* to you under the Apache License, Version 2.0 (the |
| 7 |
|
|
* "License"); you may not use this file except in compliance |
| 8 |
|
|
* with the License. You may obtain a copy of the License at |
| 9 |
|
|
* |
| 10 |
|
|
* http://www.apache.org/licenses/LICENSE-2.0 |
| 11 |
|
|
* |
| 12 |
|
|
* Unless required by applicable law or agreed to in writing, software |
| 13 |
|
|
* distributed under the License is distributed on an "AS IS" BASIS, |
| 14 |
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 |
|
|
* See the License for the specific language governing permissions and |
| 16 |
|
|
* limitations under the License. |
| 17 |
|
|
*/ |
| 18 |
|
|
package org.apache.hadoop.examples.pi; |
| 19 |
|
|
|
| 20 |
|
|
import java.util.ArrayList; |
| 21 |
|
|
import java.util.Collections; |
| 22 |
|
|
import java.util.List; |
| 23 |
|
|
import java.util.Map; |
| 24 |
|
|
|
| 25 |
|
|
import org.apache.hadoop.conf.Configured; |
| 26 |
|
|
import org.apache.hadoop.examples.pi.DistSum.Computation; |
| 27 |
|
|
import org.apache.hadoop.examples.pi.DistSum.Parameters; |
| 28 |
|
|
import org.apache.hadoop.examples.pi.math.Bellard; |
| 29 |
|
|
import org.apache.hadoop.examples.pi.math.Summation; |
| 30 |
|
|
import org.apache.hadoop.examples.pi.math.Bellard.Parameter; |
| 31 |
|
|
import org.apache.hadoop.util.Tool; |
| 32 |
|
|
import org.apache.hadoop.util.ToolRunner; |
| 33 |
|
|
|
| 34 |
|
|
/** |
| 35 |
szetszwo |
793136 |
* A map/reduce program that uses a BBP-type method to compute exact |
| 36 |
|
|
* binary digits of Pi. |
| 37 |
|
|
* This program is designed for computing the n th bit of Pi, |
| 38 |
|
|
* for large n, say n >= 10^8. |
| 39 |
|
|
* For computing lower bits of Pi, consider using bbp. |
| 40 |
|
|
* |
| 41 |
|
|
* The actually computation is done by DistSum jobs. |
| 42 |
|
|
* The steps for launching the jobs are: |
| 43 |
szetszwo |
790015 |
* |
| 44 |
|
|
* (1) Initialize parameters. |
| 45 |
|
|
* (2) Create a list of sums. |
| 46 |
|
|
* (3) Read computed values from the given local directory. |
| 47 |
|
|
* (4) Remove the computed values from the sums. |
| 48 |
|
|
* (5) Partition the remaining sums into computation jobs. |
| 49 |
szetszwo |
793136 |
* (6) Submit the computation jobs to a cluster and then wait for the results. |
| 50 |
szetszwo |
790015 |
* (7) Write job outputs to the given local directory. |
| 51 |
|
|
* (8) Combine the job outputs and print the Pi bits. |
| 52 |
|
|
*/ |
| 53 |
|
|
/* |
| 54 |
|
|
* The command line format is: |
| 55 |
|
|
* > hadoop org.apache.hadoop.examples.pi.DistBbp \ |
| 56 |
|
|
* <b> <nThreads> <nJobs> <type> <nPart> <remoteDir> <localDir> |
| 57 |
|
|
* |
| 58 |
|
|
* And the parameters are: |
| 59 |
|
|
* <b> The number of bits to skip, i.e. compute the (b+1)th position. |
| 60 |
|
|
* <nThreads> The number of working threads. |
| 61 |
|
|
* <nJobs> The number of jobs per sum. |
| 62 |
|
|
* <type> 'm' for map side job, 'r' for reduce side job, 'x' for mix type. |
| 63 |
|
|
* <nPart> The number of parts per job. |
| 64 |
|
|
* <remoteDir> Remote directory for submitting jobs. |
| 65 |
|
|
* <localDir> Local directory for storing output files. |
| 66 |
|
|
* |
| 67 |
|
|
* Note that it may take a long time to finish all the jobs when <b> is large. |
| 68 |
|
|
* If the program is killed in the middle of the execution, the same command with |
| 69 |
|
|
* a different <remoteDir> can be used to resume the execution. For example, suppose |
| 70 |
|
|
* we use the following command to compute the (10^15+57)th bit of Pi. |
| 71 |
|
|
* |
| 72 |
|
|
* > hadoop org.apache.hadoop.examples.pi.DistBbp \ |
| 73 |
|
|
* 1,000,000,000,000,056 20 1000 x 500 remote/a local/output |
| 74 |
|
|
* |
| 75 |
|
|
* It uses 20 threads to summit jobs so that there are at most 20 concurrent jobs. |
| 76 |
|
|
* Each sum (there are totally 14 sums) is partitioned into 1000 jobs. |
| 77 |
|
|
* The jobs will be executed in map-side or reduce-side. Each job has 500 parts. |
| 78 |
|
|
* The remote directory for the jobs is remote/a and the local directory |
| 79 |
|
|
* for storing output is local/output. Depends on the cluster configuration, |
| 80 |
|
|
* it may take many days to finish the entire execution. If the execution is killed, |
| 81 |
|
|
* we may resume it by |
| 82 |
|
|
* |
| 83 |
|
|
* > hadoop org.apache.hadoop.examples.pi.DistBbp \ |
| 84 |
|
|
* 1,000,000,000,000,056 20 1000 x 500 remote/b local/output |
| 85 |
|
|
*/ |
| 86 |
|
|
public final class DistBbp extends Configured implements Tool { |
| 87 |
|
|
public static final String DESCRIPTION |
| 88 |
|
|
= "A map/reduce program that uses a BBP-type formula to compute exact bits of Pi."; |
| 89 |
|
|
|
| 90 |
|
|
private final Util.Timer timer = new Util.Timer(true); |
| 91 |
|
|
|
| 92 |
|
|
/** {@inheritDoc} */ |
| 93 |
|
|
public int run(String[] args) throws Exception { |
| 94 |
|
|
//parse arguments |
| 95 |
|
|
if (args.length != DistSum.Parameters.COUNT + 1) |
| 96 |
|
|
return Util.printUsage(args, |
| 97 |
|
|
getClass().getName() + " <b> " + Parameters.LIST |
| 98 |
|
|
+ "\n <b> The number of bits to skip, i.e. compute the (b+1)th position." |
| 99 |
|
|
+ Parameters.DESCRIPTION); |
| 100 |
|
|
|
| 101 |
|
|
int i = 0; |
| 102 |
|
|
final long b = Util.string2long(args[i++]); |
| 103 |
|
|
final DistSum.Parameters parameters = DistSum.Parameters.parse(args, i); |
| 104 |
|
|
|
| 105 |
|
|
if (b < 0) |
| 106 |
|
|
throw new IllegalArgumentException("b = " + b + " < 0"); |
| 107 |
|
|
Util.printBitSkipped(b); |
| 108 |
|
|
Util.out.println(parameters); |
| 109 |
|
|
Util.out.println(); |
| 110 |
|
|
|
| 111 |
|
|
//initialize sums |
| 112 |
|
|
final DistSum distsum = new DistSum(); |
| 113 |
|
|
distsum.setConf(getConf()); |
| 114 |
|
|
distsum.setParameters(parameters); |
| 115 |
|
|
final boolean isVerbose = getConf().getBoolean(Parser.VERBOSE_PROPERTY, false); |
| 116 |
|
|
final Map<Parameter, List<TaskResult>> existings = new Parser(isVerbose).parse(parameters.localDir.getPath(), null); |
| 117 |
|
|
Parser.combine(existings); |
| 118 |
|
|
for(List<TaskResult> tr : existings.values()) |
| 119 |
|
|
Collections.sort(tr); |
| 120 |
|
|
Util.out.println(); |
| 121 |
|
|
final Map<Bellard.Parameter, Bellard.Sum> sums = Bellard.getSums(b, parameters.nJobs, existings); |
| 122 |
|
|
Util.out.println(); |
| 123 |
|
|
|
| 124 |
|
|
//execute the computations |
| 125 |
|
|
execute(distsum, sums); |
| 126 |
|
|
|
| 127 |
|
|
//compute Pi from the sums |
| 128 |
|
|
final double pi = Bellard.computePi(b, sums); |
| 129 |
|
|
Util.printBitSkipped(b); |
| 130 |
|
|
Util.out.println(Util.pi2string(pi, Bellard.bit2terms(b))); |
| 131 |
|
|
return 0; |
| 132 |
|
|
} |
| 133 |
|
|
|
| 134 |
|
|
/** Execute DistSum computations */ |
| 135 |
|
|
private void execute(DistSum distsum, |
| 136 |
|
|
final Map<Bellard.Parameter, Bellard.Sum> sums) throws Exception { |
| 137 |
|
|
final List<Computation> computations = new ArrayList<Computation>(); |
| 138 |
|
|
int i = 0; |
| 139 |
|
|
for(Bellard.Parameter p : Bellard.Parameter.values()) |
| 140 |
|
|
for(Summation s : sums.get(p)) |
| 141 |
|
|
if (s.getValue() == null) |
| 142 |
|
|
computations.add(distsum.new Computation(i++, p.toString(), s)); |
| 143 |
|
|
|
| 144 |
|
|
if (computations.isEmpty()) |
| 145 |
|
|
Util.out.println("No computation"); |
| 146 |
|
|
else { |
| 147 |
|
|
timer.tick("execute " + computations.size() + " computation(s)"); |
| 148 |
|
|
Util.execute(distsum.getParameters().nThreads, computations); |
| 149 |
|
|
timer.tick("done"); |
| 150 |
|
|
} |
| 151 |
|
|
} |
| 152 |
|
|
|
| 153 |
|
|
/** main */ |
| 154 |
|
|
public static void main(String[] args) throws Exception { |
| 155 |
|
|
System.exit(ToolRunner.run(null, new DistBbp(), args)); |
| 156 |
|
|
} |
| 157 |
|
|
} |