001 package org.apache.maven.scm.provider.git.gitexe.command.changelog; 002 003 /* 004 * Licensed to the Apache Software Foundation (ASF) under one 005 * or more contributor license agreements. See the NOTICE file 006 * distributed with this work for additional information 007 * regarding copyright ownership. The ASF licenses this file 008 * to you under the Apache License, Version 2.0 (the 009 * "License"); you may not use this file except in compliance 010 * with the License. You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, 015 * software distributed under the License is distributed on an 016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 017 * KIND, either express or implied. See the License for the 018 * specific language governing permissions and limitations 019 * under the License. 020 */ 021 022 import org.apache.maven.scm.ChangeFile; 023 import org.apache.maven.scm.ChangeSet; 024 import org.apache.maven.scm.ScmFileStatus; 025 import org.apache.maven.scm.log.ScmLogger; 026 import org.apache.maven.scm.util.AbstractConsumer; 027 import org.apache.regexp.RE; 028 import org.apache.regexp.RESyntaxException; 029 030 import java.util.ArrayList; 031 import java.util.Calendar; 032 import java.util.Date; 033 import java.util.List; 034 import java.util.Locale; 035 import java.util.TimeZone; 036 037 /** 038 * @author <a href="mailto:struberg@yahoo.de">Mark Struberg</a> 039 * @author Olivier Lamy 040 * @version $Id: GitChangeLogConsumer.java 1306864 2012-03-29 13:43:18Z olamy $ 041 */ 042 public class GitChangeLogConsumer 043 extends AbstractConsumer 044 { 045 /** 046 * Date formatter for git timestamp 047 * we use iso format cli git log --date=iso sample : 2008-08-06 01:37:18 +0200 048 */ 049 private static final String GIT_TIMESTAMP_PATTERN = "yyyy-MM-dd HH:mm:ss Z"; 050 051 /** 052 * State machine constant: expecting header 053 */ 054 private static final int STATUS_GET_HEADER = 1; 055 056 /** 057 * State machine constant: expecting author information 058 */ 059 private static final int STATUS_GET_AUTHOR = 2; 060 061 /** 062 * State machine constant: expecting parent hash information 063 */ 064 private static final int STATUS_RAW_TREE = 21; 065 066 /** 067 * State machine constant: expecting parent hash information 068 */ 069 private static final int STATUS_RAW_PARENT = 22; 070 071 /** 072 * State machine constant: expecting author name, email and timestamp information 073 */ 074 private static final int STATUS_RAW_AUTHOR = 23; 075 076 /** 077 * State machine constant: expecting committer name, email and timestamp information 078 */ 079 private static final int STATUS_RAW_COMMITTER = 24; 080 081 /** 082 * State machine constant: expecting date information 083 */ 084 private static final int STATUS_GET_DATE = 3; 085 086 /** 087 * State machine constant: expecting file information 088 */ 089 private static final int STATUS_GET_FILE = 4; 090 091 /** 092 * State machine constant: expecting comments 093 */ 094 private static final int STATUS_GET_COMMENT = 5; 095 096 /** 097 * The pattern used to match git header lines 098 */ 099 private static final String HEADER_PATTERN = "^commit (.*)"; 100 101 /** 102 * The pattern used to match git author lines 103 */ 104 private static final String AUTHOR_PATTERN = "^Author: (.*)"; 105 106 /** 107 * The pattern used to match git tree hash lines (raw mode) 108 */ 109 private static final String RAW_TREE_PATTERN = "^tree ([:xdigit:]+)"; 110 111 /** 112 * The pattern used to match git parent hash lines (raw mode) 113 */ 114 private static final String RAW_PARENT_PATTERN = "^parent ([:xdigit:]+)"; 115 116 /** 117 * The pattern used to match git author lines (raw mode) 118 */ 119 private static final String RAW_AUTHOR_PATTERN = "^author (.+ <.+>) ([:digit:]+) (.*)"; 120 121 /** 122 * The pattern used to match git author lines (raw mode) 123 */ 124 private static final String RAW_COMMITTER_PATTERN = "^committer (.+ <.+>) ([:digit:]+) (.*)"; 125 126 /** 127 * The pattern used to match git date lines 128 */ 129 private static final String DATE_PATTERN = "^Date:\\s*(.*)"; 130 131 /** 132 * The pattern used to match git file lines 133 */ 134 private static final String FILE_PATTERN = 135 "^:\\d* \\d* [:xdigit:]*\\.* [:xdigit:]*\\.* ([:upper:])[:digit:]*\\t([^\\t]*)(\\t(.*))?"; 136 137 /** 138 * Current status of the parser 139 */ 140 private int status = STATUS_GET_HEADER; 141 142 /** 143 * List of change log entries 144 */ 145 private List<ChangeSet> entries = new ArrayList<ChangeSet>(); 146 147 /** 148 * The current log entry being processed by the parser 149 */ 150 private ChangeSet currentChange; 151 152 /** 153 * The current revision of the entry being processed by the parser 154 */ 155 private String currentRevision; 156 157 /** 158 * The current comment of the entry being processed by the parser 159 */ 160 private StringBuilder currentComment; 161 162 /** 163 * The regular expression used to match header lines 164 */ 165 private RE headerRegexp; 166 167 /** 168 * The regular expression used to match author lines 169 */ 170 private RE authorRegexp; 171 172 /** 173 * The regular expression used to match tree hash lines in raw mode 174 */ 175 private RE rawTreeRegexp; 176 177 /** 178 * The regular expression used to match parent hash lines in raw mode 179 */ 180 private RE rawParentRegexp; 181 182 /** 183 * The regular expression used to match author lines in raw mode 184 */ 185 private RE rawAuthorRegexp; 186 187 /** 188 * The regular expression used to match committer lines in raw mode 189 */ 190 private RE rawCommitterRegexp; 191 192 /** 193 * The regular expression used to match date lines 194 */ 195 private RE dateRegexp; 196 197 /** 198 * The regular expression used to match file lines 199 */ 200 private RE fileRegexp; 201 202 private String userDateFormat; 203 204 /** 205 * Default constructor. 206 */ 207 public GitChangeLogConsumer( ScmLogger logger, String userDateFormat ) 208 { 209 super( logger ); 210 211 this.userDateFormat = userDateFormat; 212 213 try 214 { 215 headerRegexp = new RE( HEADER_PATTERN ); 216 authorRegexp = new RE( AUTHOR_PATTERN ); 217 dateRegexp = new RE( DATE_PATTERN ); 218 fileRegexp = new RE( FILE_PATTERN ); 219 rawTreeRegexp = new RE( RAW_TREE_PATTERN ); 220 rawParentRegexp = new RE( RAW_PARENT_PATTERN ); 221 rawAuthorRegexp = new RE( RAW_AUTHOR_PATTERN ); 222 rawCommitterRegexp = new RE( RAW_COMMITTER_PATTERN ); 223 } 224 catch ( RESyntaxException ex ) 225 { 226 throw new RuntimeException( 227 "INTERNAL ERROR: Could not create regexp to parse git log file. This shouldn't happen. Something is probably wrong with the oro installation.", 228 ex ); 229 } 230 } 231 232 public List<ChangeSet> getModifications() 233 { 234 // this is needed since the processFile does not always get a the end-sequence correctly. 235 processGetFile( "" ); 236 237 return entries; 238 } 239 240 // ---------------------------------------------------------------------- 241 // StreamConsumer Implementation 242 // ---------------------------------------------------------------------- 243 244 /** 245 * {@inheritDoc} 246 */ 247 public void consumeLine( String line ) 248 { 249 switch ( status ) 250 { 251 case STATUS_GET_HEADER: 252 processGetHeader( line ); 253 break; 254 case STATUS_GET_AUTHOR: 255 processGetAuthor( line ); 256 break; 257 case STATUS_GET_DATE: 258 processGetDate( line, null ); 259 break; 260 case STATUS_GET_COMMENT: 261 processGetComment( line ); 262 break; 263 case STATUS_GET_FILE: 264 processGetFile( line ); 265 break; 266 case STATUS_RAW_TREE: 267 processGetRawTree( line ); 268 break; 269 case STATUS_RAW_PARENT: 270 processGetRawParent( line ); 271 break; 272 case STATUS_RAW_AUTHOR: 273 processGetRawAuthor( line ); 274 break; 275 case STATUS_RAW_COMMITTER: 276 processGetRawCommitter( line ); 277 break; 278 default: 279 throw new IllegalStateException( "Unknown state: " + status ); 280 } 281 } 282 283 // ---------------------------------------------------------------------- 284 // 285 // ---------------------------------------------------------------------- 286 287 /** 288 * Process the current input line in the GET_HEADER state. The 289 * author, date, and the revision of the entry are gathered. Note, 290 * Git does not have per-file revisions, instead, the entire 291 * branch is given a single revision number, which is also used for 292 * the revision number of each file. 293 * 294 * @param line A line of text from the git log output 295 */ 296 private void processGetHeader( String line ) 297 { 298 if ( !headerRegexp.match( line ) ) 299 { 300 return; 301 } 302 303 currentRevision = headerRegexp.getParen( 1 ); 304 305 currentChange = new ChangeSet(); 306 307 currentChange.setRevision( currentRevision ); 308 309 status = STATUS_GET_AUTHOR; 310 } 311 312 /** 313 * Process the current input line in the STATUS_GET_AUTHOR state. This 314 * state gathers all of the author information that are part of a log entry. 315 * 316 * @param line a line of text from the git log output 317 */ 318 private void processGetAuthor( String line ) 319 { 320 // this autodetects 'raw' format 321 if ( rawTreeRegexp.match( line ) ) 322 { 323 status = STATUS_RAW_TREE; 324 processGetRawTree( line ); 325 return; 326 } 327 328 if ( !authorRegexp.match( line ) ) 329 { 330 return; 331 } 332 String author = authorRegexp.getParen( 1 ); 333 334 currentChange.setAuthor( author ); 335 336 status = STATUS_GET_DATE; 337 } 338 339 /** 340 * Process the current input line in the STATUS_RAW_TREE state. This 341 * state gathers tree hash part of a log entry. 342 * 343 * @param line a line of text from the git log output 344 */ 345 private void processGetRawTree( String line ) 346 { 347 if ( !rawTreeRegexp.match( line ) ) 348 { 349 return; 350 } 351 //here we could set treeHash if it appears in the model: currentChange.setTreeHash( rawTreeRegexp.getParen( 1 ) ); 352 status = STATUS_RAW_PARENT; 353 } 354 355 /** 356 * Process the current input line in the STATUS_RAW_PARENT state. This 357 * state gathers parent revisions of a log entry. 358 * 359 * @param line a line of text from the git log output 360 */ 361 private void processGetRawParent( String line ) 362 { 363 if ( !rawParentRegexp.match( line ) ) 364 { 365 status = STATUS_RAW_AUTHOR; 366 processGetRawAuthor( line ); 367 return; 368 } 369 String parentHash = rawParentRegexp.getParen( 1 ); 370 371 addParentRevision( parentHash ); 372 } 373 374 /** 375 * In git log, both parent and merged revisions are called parent. Fortunately, the real parent comes first in the log. 376 * This method takes care of the difference. 377 * 378 * @param hash - 379 */ 380 private void addParentRevision( String hash ) 381 { 382 if ( currentChange.getParentRevision() == null ) 383 { 384 currentChange.setParentRevision( hash ); 385 } 386 else 387 { 388 currentChange.addMergedRevision( hash ); 389 } 390 } 391 392 /** 393 * Process the current input line in the STATUS_RAW_AUTHOR state. This 394 * state gathers all the author information of a log entry. 395 * 396 * @param line a line of text from the git log output 397 */ 398 private void processGetRawAuthor( String line ) 399 { 400 if ( !rawAuthorRegexp.match( line ) ) 401 { 402 return; 403 } 404 String author = rawAuthorRegexp.getParen( 1 ); 405 currentChange.setAuthor( author ); 406 407 String datestring = rawAuthorRegexp.getParen( 2 ); 408 String tz = rawAuthorRegexp.getParen( 3 ); 409 410 // with --format=raw option (which gets us to this methods), date is always in seconds since beginning of time 411 // even explicit --date=iso is ignored, so we ignore both userDateFormat and GIT_TIMESTAMP_PATTERN here 412 Calendar c = Calendar.getInstance( TimeZone.getTimeZone( tz ) ); 413 c.setTimeInMillis( Long.parseLong( datestring ) * 1000 ); 414 currentChange.setDate( c.getTime() ); 415 416 status = STATUS_RAW_COMMITTER; 417 } 418 419 /** 420 * Process the current input line in the STATUS_RAW_AUTHOR state. This 421 * state gathers all the committer information of a log entry. 422 * 423 * @param line a line of text from the git log output 424 */ 425 private void processGetRawCommitter( String line ) 426 { 427 if ( !rawCommitterRegexp.match( line ) ) 428 { 429 return; 430 } 431 // here we could set committer and committerDate, the same way as in processGetRawAuthor 432 status = STATUS_GET_COMMENT; 433 } 434 435 /** 436 * Process the current input line in the STATUS_GET_DATE state. This 437 * state gathers all of the date information that are part of a log entry. 438 * 439 * @param line a line of text from the git log output 440 */ 441 private void processGetDate( String line, Locale locale ) 442 { 443 if ( !dateRegexp.match( line ) ) 444 { 445 return; 446 } 447 448 String datestring = dateRegexp.getParen( 1 ); 449 450 Date date = parseDate( datestring.trim(), userDateFormat, GIT_TIMESTAMP_PATTERN, locale ); 451 452 currentChange.setDate( date ); 453 454 status = STATUS_GET_COMMENT; 455 } 456 457 /** 458 * Process the current input line in the GET_COMMENT state. This 459 * state gathers all of the comments that are part of a log entry. 460 * 461 * @param line a line of text from the git log output 462 */ 463 private void processGetComment( String line ) 464 { 465 if ( line.length() < 4 ) 466 { 467 if ( currentComment == null ) 468 { 469 currentComment = new StringBuilder(); 470 } 471 else 472 { 473 currentChange.setComment( currentComment.toString() ); 474 status = STATUS_GET_FILE; 475 } 476 } 477 else 478 { 479 if ( currentComment.length() > 0 ) 480 { 481 currentComment.append( '\n' ); 482 } 483 484 currentComment.append( line.substring( 4 ) ); 485 } 486 } 487 488 /** 489 * Process the current input line in the GET_FILE state. This state 490 * adds each file entry line to the current change log entry. Note, 491 * the revision number for the entire entry is used for the revision 492 * number of each file. 493 * 494 * @param line A line of text from the git log output 495 */ 496 private void processGetFile( String line ) 497 { 498 if ( line.length() == 0 ) 499 { 500 if ( currentChange != null ) 501 { 502 entries.add( currentChange ); 503 } 504 505 resetChangeLog(); 506 507 status = STATUS_GET_HEADER; 508 } 509 else 510 { 511 if ( !fileRegexp.match( line ) ) 512 { 513 return; 514 } 515 final String actionChar = fileRegexp.getParen( 1 ); 516 // action is currently not used 517 final ScmFileStatus action; 518 String name = fileRegexp.getParen( 2 ); 519 String originalName = null; 520 String originalRevision = null; 521 if ( "A".equals( actionChar ) ) 522 { 523 action = ScmFileStatus.ADDED; 524 } 525 else if ( "M".equals( actionChar ) ) 526 { 527 action = ScmFileStatus.MODIFIED; 528 } 529 else if ( "D".equals( actionChar ) ) 530 { 531 action = ScmFileStatus.DELETED; 532 } 533 else if ( "R".equals( actionChar ) ) 534 { 535 action = ScmFileStatus.RENAMED; 536 originalName = name; 537 name = fileRegexp.getParen( 4 ); 538 originalRevision = currentChange.getParentRevision(); 539 } 540 else if ( "C".equals( actionChar ) ) 541 { 542 action = ScmFileStatus.COPIED; 543 originalName = name; 544 name = fileRegexp.getParen( 4 ); 545 originalRevision = currentChange.getParentRevision(); 546 } 547 else 548 { 549 action = ScmFileStatus.UNKNOWN; 550 } 551 552 final ChangeFile changeFile = new ChangeFile( name, currentRevision ); 553 changeFile.setAction( action ); 554 changeFile.setOriginalName( originalName ); 555 changeFile.setOriginalRevision( originalRevision ); 556 currentChange.addFile( changeFile ); 557 } 558 } 559 560 private void resetChangeLog() 561 { 562 currentComment = null; 563 currentChange = null; 564 } 565 }