1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.chukwa.database;
20
21
22 import java.text.SimpleDateFormat;
23 import java.util.Calendar;
24 import java.util.Date;
25 import java.util.HashMap;
26 import java.util.Iterator;
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29 import org.apache.hadoop.chukwa.util.DatabaseWriter;
30
31 public class DataExpiration {
32 private static DatabaseConfig dbc = null;
33 private static Log log = LogFactory.getLog(DataExpiration.class);
34
35 public DataExpiration() {
36 if (dbc == null) {
37 dbc = new DatabaseConfig();
38 }
39 }
40
41 public void dropTables(long start, long end) {
42 String cluster = System.getProperty("CLUSTER");
43 if (cluster == null) {
44 cluster = "unknown";
45 }
46 DatabaseWriter dbw = new DatabaseWriter(cluster);
47 try {
48 HashMap<String, String> dbNames = dbc.startWith("report.db.name.");
49 Iterator<String> ki = dbNames.keySet().iterator();
50 while (ki.hasNext()) {
51 String name = ki.next();
52 String tableName = dbNames.get(name);
53 String[] tableList = dbc.findTableName(tableName, start, end);
54 for (String tl : tableList) {
55 log.debug("table name: " + tableList[0]);
56 try {
57 String[] parts = tl.split("_");
58 int partition = Integer.parseInt(parts[parts.length - 2]);
59 String table = "";
60 for (int i = 0; i < parts.length - 2; i++) {
61 if (i != 0) {
62 table = table + "_";
63 }
64 table = table + parts[i];
65 }
66 partition = partition - 3;
67 String dropPartition = "drop table if exists " + table + "_"
68 + partition + "_" + parts[parts.length - 1];
69 dbw.execute(dropPartition);
70 partition--;
71 if(partition>=0) {
72 dropPartition = "drop table if exists " + table + "_" + partition
73 + "_" + parts[parts.length - 1];
74 dbw.execute(dropPartition);
75 }
76 } catch (NumberFormatException e) {
77 log
78 .error("Error in parsing table partition number, skipping table:"
79 + tableList[0]);
80 } catch (ArrayIndexOutOfBoundsException e) {
81 log.debug("Skipping table:" + tableList[0]
82 + ", because it has no partition configuration.");
83 }
84 }
85 }
86 dbw.close();
87 } catch (Exception e) {
88 e.printStackTrace();
89 }
90 }
91
92 public static void usage() {
93 System.out.println("DataExpiration usage:");
94 System.out
95 .println("java -jar chukwa-core.jar org.apache.hadoop.chukwa.DataExpiration <date> <time window size>");
96 System.out.println(" date format: YYYY-MM-DD");
97 System.out.println(" time window size: 7, 30, 91, 365");
98 }
99
100 public static void main(String[] args) {
101 DataExpiration de = new DataExpiration();
102 long now = (new Date()).getTime();
103 long start = now;
104 long end = now;
105 if (args.length == 2) {
106 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
107 try {
108 long dataExpStart = Calendar.getInstance().getTimeInMillis();
109 start = sdf.parse(args[0]).getTime();
110 end = start + (Long.parseLong(args[1]) * 1440 * 60 * 1000L);
111 de.dropTables(start, end);
112 long dataExpEnd = Calendar.getInstance().getTimeInMillis();
113 log.info("DataExpiration for: "+args[0]+" "+args[1]+" finished: ("+(double) (dataExpEnd-dataExpStart)/1000+" seconds)");
114 } catch (Exception e) {
115 usage();
116 }
117 } else {
118 usage();
119 }
120 }
121 }