JobLog

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.chukwa.extraction.demux.processor.mapper;

import java.util.ArrayList;

import java.util.HashMap;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.hadoop.chukwa.datacollection.writer.hbase.Annotation.Table;
import org.apache.hadoop.chukwa.datacollection.writer.hbase.Annotation.Tables;
import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecord;
import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecordKey;
import org.apache.hadoop.mapred.JobHistory;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

@Tables(annotations={
@Table(name="Mapreduce",columnFamily="JobData"),
@Table(name="Mapreduce",columnFamily="TaskData")
})
public class JobLog extends AbstractProcessor {
  private String savedLines = "";

  /**
   * Job logs could be split into multiple lines.
   * If input recordEntry ends with '"' or '" .', process the line.
   * Otherwise, save the log and wait for the next log.
   *
   * @return An object of JobLogLine if a full job log is found. Null otherwise.
   */
  public JobLogLine getJobLogLine(String recordEntry) {
    if(recordEntry == null) {
      savedLines = "";
      return null;
    }
    recordEntry = recordEntry.trim();
    if(recordEntry.length() == 0 || recordEntry.startsWith("Meta")) {
      savedLines = "";
      return null;
    }

    if(recordEntry.startsWith("Job")
        || recordEntry.startsWith("Meta")
        || recordEntry.startsWith("Task")
        || recordEntry.startsWith("MapAttempt")
        || recordEntry.startsWith("ReduceAttempt"))
    {
      savedLines = "";
    }

    savedLines += recordEntry;
    if(!savedLines.endsWith("\"") && !savedLines.endsWith("\" .")) {
      return null;
    }

    JobLogLine line = new JobLogLine(savedLines);
    return line;
  }

	@Override
	protected void parse(String recordEntry,
			OutputCollector<ChukwaRecordKey, ChukwaRecord> output,
			Reporter reporter) throws Throwable
	{
	  JobLogLine line = getJobLogLine(recordEntry);
	  if(line == null || (!line.getLogType().equals("Meta")
	                      && !line.getLogType().equals("JobData")
	                      && !line.getLogType().equals("TaskData")))
	  {
	    return;
	  }

    if(line.getLogType().equals("Meta")) {
      String streamName = chunk.getStreamName();
      if(streamName == null) {
        return;
      }
      String jobId = JobLogFileName.getJobIdFromFileName(streamName);
      if(jobId == null) {
        return;
      }
      line.setLogType("JobData");
    }

		key = new ChukwaRecordKey();
		ChukwaRecord record = new ChukwaRecord();
		this.buildGenericRecord(record, null, -1l, line.getLogType());

		for (Entry<String, String> entry : line.entrySet()) {
			record.add(entry.getKey(), entry.getValue());
		}

		for(Entry<String, Long> entry : line.getCounterHash().flat().entrySet()) {
			record.add(entry.getKey(), entry.getValue().toString());
		}

		long timestamp = line.getTimestamp();
		record.setTime(timestamp);
		key.setKey(getKey(timestamp, line.getJobId()));
		output.collect(key, record);
	}

	private String getKey(long ts, String jobId) {
		long unit = 60 * 60 * 1000;
		if(ts == 0) {
		  ts = archiveKey.getTimePartition();
		}
		long rounded = (ts / unit) * unit;
		return rounded + "/" + jobId + "/" + ts;
	}

	public static class JobLogLine extends HashMap<String, String> {
		private static final long serialVersionUID = 4902948603527677036L;

		/**
		 * search timestamp from stream. if no timestamp found, use last seen one.
		 */
		private static final String[] timestampKeys = {
			JobHistory.Keys.SUBMIT_TIME.toString(),
			JobHistory.Keys.LAUNCH_TIME.toString(),
			JobHistory.Keys.START_TIME.toString(),
			JobHistory.Keys.FINISH_TIME.toString(),
		};
		private static long lastTimestamp = 0l;

		private String logType;
		private String jobId;
		private String taskId;
		private CounterHash counterHash;

		/**
		 * example lines:
		 * 		Task TASKID="task_200903062215_0577_r_000000" TASK_TYPE="REDUCE" START_TIME="1236386538540" SPLITS="" .
		 *		Job JOBID="job_200903062215_0577" JOB_PRIORITY="NORMAL" .
		 *		Job JOBID="job_200903062215_0577" LAUNCH_TIME="1236386526545" TOTAL_MAPS="14" TOTAL_REDUCES="1" JOB_STATUS="PREP" .
		 */
		public JobLogLine(String line) {
			line = line.trim();
			if (line.length() == 0)
				return;

			String key = null;
			String[] pairs = line.split("=\"");
			for (int i = 0; i < pairs.length; i++) {
				if (i == 0) {
					String[] fields = pairs[i].split(" ");

					logType = fields[0];
					if(logType.equals("Job")) {
						logType = "JobData";
					}
					else if (logType.equals("Task") || logType.equals("MapAttempt") || logType.equals("ReduceAttempt")) {
						logType = "TaskData";
					}

					if (fields.length > 1)
						key = fields[1];
					continue;
				}

				int pos = pairs[i].lastIndexOf('"');
				String value = pairs[i].substring(0, pos);
	                        put(key, value);
				if(i == (pairs.length-1))
					break;
				key = pairs[i].substring(pos + 2);
			}

			// jobid format: job_200903062215_0577
			jobId = get(JobHistory.Keys.JOBID.toString());

			// taskid format: task_200903062215_0577_r_000000
			taskId = get(JobHistory.Keys.TASKID.toString());
			if(taskId != null) {
				String[] fields = taskId.split("_");
				jobId = "job_" + fields[1] + "_" + fields[2];
				put(JobHistory.Keys.JOBID.toString(), jobId);
				taskId = taskId.substring(5);
			}

			counterHash = new CounterHash(get(JobHistory.Keys.COUNTERS.toString()));

			if(get("TASK_ATTEMPT_ID") != null) {
				put("TASK_ATTEMPT_TIMES", "" + getAttempts());
			}

			if(logType.equals("JobData") && get(JobHistory.Keys.FINISH_TIME.toString())!=null) {
			  put("JOB_FINAL_STATUS", get("JOB_STATUS"));
			}

            for(String timeKey : timestampKeys) {
                String value = get(timeKey);
                if(value == null || value.equals("0")) {
                    remove(timeKey);
                }
            }
		}

    public String getLogType() {
      return logType;
    }

    public void setLogType(String logType) {
      this.logType = logType;
    }

		public String getJobId() {
			return jobId;
		}

		public String getTaskId() {
			return taskId;
		}

		public long getTimestamp() {
			for(String key : timestampKeys) {
				String value = get(key);
				if(value != null && value.length() != 0) {
					long ts = Long.parseLong(value);
					if(ts > lastTimestamp) {
						lastTimestamp = ts;
					}
					break;
				}
			}
			return lastTimestamp;
		}

		public CounterHash getCounterHash() {
			return counterHash;
		}

		public int getAttempts() {
			String attemptId = get("TASK_ATTEMPT_ID");
			if(attemptId == null) {
				return -1;
			}
			else {
				try {
					String[] elems = attemptId.split("_");
					return Integer.parseInt(elems[elems.length - 1] + 1);
				} catch (NumberFormatException e) {
					return -1;
				}
			}
		}
	}

	/**
	 * Parse counter string to object
	 *
	 * Example string:
	 * {(org\.apache\.hadoop\.mapred\.JobInProgress$Counter)(Job Counters )
		    [(TOTAL_LAUNCHED_REDUCES)(Launched reduce tasks)(1)]
		    [(TOTAL_LAUNCHED_MAPS)(Launched map tasks)(14)]
		    [(DATA_LOCAL_MAPS)(Data-local map tasks)(14)]
		}
		{(FileSystemCounters)(FileSystemCounters)
		    [(FILE_BYTES_READ)(FILE_BYTES_READ)(132)]
		    [(HDFS_BYTES_READ)(HDFS_BYTES_READ)(20471)]
		    [(FILE_BYTES_WRITTEN)(FILE_BYTES_WRITTEN)(790)]
		    [(HDFS_BYTES_WRITTEN)(HDFS_BYTES_WRITTEN)(248)]
		}
	 */
	public static class CounterHash extends HashMap<String, HashMap<String, Long>>{
		public CounterHash(String str) {
			if(str == null) {
				return;
			}

			if(str.startsWith("{")) {
				for(String group : split(str, "[{}]")) {
					HashMap<String, Long> hash = null;
					for(String counter : split(group, "[\\[\\]]")) {
						ArrayList<String> idAndDisplay = split(counter, "[\\(\\)]");
						if(hash == null) {
							hash = new HashMap<String, Long>();
							String groupId = idAndDisplay.get(0).replaceAll("\\\\.", ".");
							put(groupId, hash);
						}
						else {
							hash.put(idAndDisplay.get(0), Long.parseLong(idAndDisplay.get(2)));
						}
					}
				}
			} else {
				HashMap<String, Long> hash = new HashMap<String, Long>();
				put("Hadoop18", hash);
				for(String counter : split(str, ",")) {
					ArrayList<String> kv = split(counter, ":");
					hash.put(kv.get(0), Long.parseLong(kv.get(1)));
				}
			}
		}

		/**
		 * Flat the counter hashs and add into map passed int.
		 *
		 * For example mentioned in the constructor, the result will be
		 * <pre>
		 * Counter:org\.apache\.hadoop\.mapred\.JobInProgress$Counter:TOTAL_LAUNCHED_REDUCES=1
		 * Counter:org\.apache\.hadoop\.mapred\.JobInProgress$Counter:TOTAL_LAUNCHED_MAPS=14
		 * Counter:org\.apache\.hadoop\.mapred\.JobInProgress$Counter:DATA_LOCAL_MAPS=14
		 * Counter:FileSystemCounters:FILE_BYTES_READ=132
		 * Counter:FileSystemCounters:HDFS_BYTES_READ=20471
		 * Counter:FileSystemCounters:FILE_BYTES_WRITTEN=790
		 * Counter:FileSystemCounters:HDFS_BYTES_WRITTEN=248
		 * </pre>
		 */
		public HashMap<String, Long> flat() {
			HashMap<String, Long> result = new HashMap<String, Long>();
			for(Entry<String, HashMap<String, Long>> entry : entrySet()) {
				String id = entry.getKey();
				for(Entry<String, Long> counterValue : entry.getValue().entrySet()) {
					result.put("Counter:" + id + ":" + counterValue.getKey(), counterValue.getValue());
				}
			}
			return result;
		}
	}

	public static ArrayList<String> split(String s, String regex) {
		ArrayList<String> result = new ArrayList<String>();
		for(String field : s.split(regex)) {
			if(field != null && field.length()>0) {
				result.add(field);
			}
		}
		return result;
	}

	private static class JobLogFileName {
    private static final Pattern pattern = Pattern.compile("job_[0-9]+_[0-9]+");

    public static String getJobIdFromFileName(String name) {
      Matcher matcher = pattern.matcher(name);
      if (matcher.find()) {
        return matcher.group(0);
      }
      else {
        return null;
      }
    }
	}

}