001    package org.apache.maven.scm.provider.git.gitexe.command.changelog;
002    
003    /*
004     * Licensed to the Apache Software Foundation (ASF) under one
005     * or more contributor license agreements.  See the NOTICE file
006     * distributed with this work for additional information
007     * regarding copyright ownership.  The ASF licenses this file
008     * to you under the Apache License, Version 2.0 (the
009     * "License"); you may not use this file except in compliance
010     * with the License.  You may obtain a copy of the License at
011     *
012     * http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing,
015     * software distributed under the License is distributed on an
016     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017     * KIND, either express or implied.  See the License for the
018     * specific language governing permissions and limitations
019     * under the License.
020     */
021    
022    import org.apache.maven.scm.ChangeFile;
023    import org.apache.maven.scm.ChangeSet;
024    import org.apache.maven.scm.ScmFileStatus;
025    import org.apache.maven.scm.log.ScmLogger;
026    import org.apache.maven.scm.util.AbstractConsumer;
027    import org.apache.regexp.RE;
028    import org.apache.regexp.RESyntaxException;
029    
030    import java.util.ArrayList;
031    import java.util.Calendar;
032    import java.util.Date;
033    import java.util.List;
034    import java.util.Locale;
035    import java.util.TimeZone;
036    
037    /**
038     * @author <a href="mailto:struberg@yahoo.de">Mark Struberg</a>
039     * @author Olivier Lamy
040     * @version $Id: GitChangeLogConsumer.java 1306864 2012-03-29 13:43:18Z olamy $
041     */
042    public class GitChangeLogConsumer
043        extends AbstractConsumer
044    {
045        /**
046         * Date formatter for git timestamp
047         * we use iso format cli git log --date=iso sample : 2008-08-06 01:37:18 +0200
048         */
049        private static final String GIT_TIMESTAMP_PATTERN = "yyyy-MM-dd HH:mm:ss Z";
050    
051        /**
052         * State machine constant: expecting header
053         */
054        private static final int STATUS_GET_HEADER = 1;
055    
056        /**
057         * State machine constant: expecting author information
058         */
059        private static final int STATUS_GET_AUTHOR = 2;
060    
061        /**
062         * State machine constant: expecting parent hash information
063         */
064        private static final int STATUS_RAW_TREE = 21;
065    
066        /**
067         * State machine constant: expecting parent hash information
068         */
069        private static final int STATUS_RAW_PARENT = 22;
070    
071        /**
072         * State machine constant: expecting author name, email and timestamp information
073         */
074        private static final int STATUS_RAW_AUTHOR = 23;
075    
076        /**
077         * State machine constant: expecting committer name, email and timestamp information
078         */
079        private static final int STATUS_RAW_COMMITTER = 24;
080    
081        /**
082         * State machine constant: expecting date information
083         */
084        private static final int STATUS_GET_DATE = 3;
085    
086        /**
087         * State machine constant: expecting file information
088         */
089        private static final int STATUS_GET_FILE = 4;
090    
091        /**
092         * State machine constant: expecting comments
093         */
094        private static final int STATUS_GET_COMMENT = 5;
095    
096        /**
097         * The pattern used to match git header lines
098         */
099        private static final String HEADER_PATTERN = "^commit (.*)";
100    
101        /**
102         * The pattern used to match git author lines
103         */
104        private static final String AUTHOR_PATTERN = "^Author: (.*)";
105    
106        /**
107         * The pattern used to match git tree hash lines (raw mode)
108         */
109        private static final String RAW_TREE_PATTERN = "^tree ([:xdigit:]+)";
110    
111        /**
112         * The pattern used to match git parent hash lines (raw mode)
113         */
114        private static final String RAW_PARENT_PATTERN = "^parent ([:xdigit:]+)";
115    
116        /**
117         * The pattern used to match git author lines (raw mode)
118         */
119        private static final String RAW_AUTHOR_PATTERN = "^author (.+ <.+>) ([:digit:]+) (.*)";
120    
121        /**
122         * The pattern used to match git author lines (raw mode)
123         */
124        private static final String RAW_COMMITTER_PATTERN = "^committer (.+ <.+>) ([:digit:]+) (.*)";
125    
126        /**
127         * The pattern used to match git date lines
128         */
129        private static final String DATE_PATTERN = "^Date:\\s*(.*)";
130    
131        /**
132         * The pattern used to match git file lines
133         */
134        private static final String FILE_PATTERN =
135            "^:\\d* \\d* [:xdigit:]*\\.* [:xdigit:]*\\.* ([:upper:])[:digit:]*\\t([^\\t]*)(\\t(.*))?";
136    
137        /**
138         * Current status of the parser
139         */
140        private int status = STATUS_GET_HEADER;
141    
142        /**
143         * List of change log entries
144         */
145        private List<ChangeSet> entries = new ArrayList<ChangeSet>();
146    
147        /**
148         * The current log entry being processed by the parser
149         */
150        private ChangeSet currentChange;
151    
152        /**
153         * The current revision of the entry being processed by the parser
154         */
155        private String currentRevision;
156    
157        /**
158         * The current comment of the entry being processed by the parser
159         */
160        private StringBuilder currentComment;
161    
162        /**
163         * The regular expression used to match header lines
164         */
165        private RE headerRegexp;
166    
167        /**
168         * The regular expression used to match author lines
169         */
170        private RE authorRegexp;
171    
172        /**
173         * The regular expression used to match tree hash lines in raw mode
174         */
175        private RE rawTreeRegexp;
176    
177        /**
178         * The regular expression used to match parent hash lines in raw mode
179         */
180        private RE rawParentRegexp;
181    
182        /**
183         * The regular expression used to match author lines in raw mode
184         */
185        private RE rawAuthorRegexp;
186    
187        /**
188         * The regular expression used to match committer lines in raw mode
189         */
190        private RE rawCommitterRegexp;
191    
192        /**
193         * The regular expression used to match date lines
194         */
195        private RE dateRegexp;
196    
197        /**
198         * The regular expression used to match file lines
199         */
200        private RE fileRegexp;
201    
202        private String userDateFormat;
203    
204        /**
205         * Default constructor.
206         */
207        public GitChangeLogConsumer( ScmLogger logger, String userDateFormat )
208        {
209            super( logger );
210    
211            this.userDateFormat = userDateFormat;
212    
213            try
214            {
215                headerRegexp = new RE( HEADER_PATTERN );
216                authorRegexp = new RE( AUTHOR_PATTERN );
217                dateRegexp = new RE( DATE_PATTERN );
218                fileRegexp = new RE( FILE_PATTERN );
219                rawTreeRegexp = new RE( RAW_TREE_PATTERN );
220                rawParentRegexp = new RE( RAW_PARENT_PATTERN );
221                rawAuthorRegexp = new RE( RAW_AUTHOR_PATTERN );
222                rawCommitterRegexp = new RE( RAW_COMMITTER_PATTERN );
223            }
224            catch ( RESyntaxException ex )
225            {
226                throw new RuntimeException(
227                    "INTERNAL ERROR: Could not create regexp to parse git log file. This shouldn't happen. Something is probably wrong with the oro installation.",
228                    ex );
229            }
230        }
231    
232        public List<ChangeSet> getModifications()
233        {
234            // this is needed since the processFile does not always get a the end-sequence correctly.
235            processGetFile( "" );
236    
237            return entries;
238        }
239    
240        // ----------------------------------------------------------------------
241        // StreamConsumer Implementation
242        // ----------------------------------------------------------------------
243    
244        /**
245         * {@inheritDoc}
246         */
247        public void consumeLine( String line )
248        {
249            switch ( status )
250            {
251                case STATUS_GET_HEADER:
252                    processGetHeader( line );
253                    break;
254                case STATUS_GET_AUTHOR:
255                    processGetAuthor( line );
256                    break;
257                case STATUS_GET_DATE:
258                    processGetDate( line, null );
259                    break;
260                case STATUS_GET_COMMENT:
261                    processGetComment( line );
262                    break;
263                case STATUS_GET_FILE:
264                    processGetFile( line );
265                    break;
266                case STATUS_RAW_TREE:
267                    processGetRawTree( line );
268                    break;
269                case STATUS_RAW_PARENT:
270                    processGetRawParent( line );
271                    break;
272                case STATUS_RAW_AUTHOR:
273                    processGetRawAuthor( line );
274                    break;
275                case STATUS_RAW_COMMITTER:
276                    processGetRawCommitter( line );
277                    break;
278                default:
279                    throw new IllegalStateException( "Unknown state: " + status );
280            }
281        }
282    
283        // ----------------------------------------------------------------------
284        //
285        // ----------------------------------------------------------------------
286    
287        /**
288         * Process the current input line in the GET_HEADER state.  The
289         * author, date, and the revision of the entry are gathered.  Note,
290         * Git does not have per-file revisions, instead, the entire
291         * branch is given a single revision number, which is also used for
292         * the revision number of each file.
293         *
294         * @param line A line of text from the git log output
295         */
296        private void processGetHeader( String line )
297        {
298            if ( !headerRegexp.match( line ) )
299            {
300                return;
301            }
302    
303            currentRevision = headerRegexp.getParen( 1 );
304    
305            currentChange = new ChangeSet();
306    
307            currentChange.setRevision( currentRevision );
308    
309            status = STATUS_GET_AUTHOR;
310        }
311    
312        /**
313         * Process the current input line in the STATUS_GET_AUTHOR state.  This
314         * state gathers all of the author information that are part of a log entry.
315         *
316         * @param line a line of text from the git log output
317         */
318        private void processGetAuthor( String line )
319        {
320            // this autodetects 'raw' format
321            if ( rawTreeRegexp.match( line ) )
322            {
323                status = STATUS_RAW_TREE;
324                processGetRawTree( line );
325                return;
326            }
327    
328            if ( !authorRegexp.match( line ) )
329            {
330                return;
331            }
332            String author = authorRegexp.getParen( 1 );
333    
334            currentChange.setAuthor( author );
335    
336            status = STATUS_GET_DATE;
337        }
338    
339        /**
340         * Process the current input line in the STATUS_RAW_TREE state.  This
341         * state gathers tree hash part of a log entry.
342         *
343         * @param line a line of text from the git log output
344         */
345        private void processGetRawTree( String line )
346        {
347            if ( !rawTreeRegexp.match( line ) )
348            {
349                return;
350            }
351            //here we could set treeHash if it appears in the model: currentChange.setTreeHash( rawTreeRegexp.getParen( 1 ) );
352            status = STATUS_RAW_PARENT;
353        }
354    
355        /**
356         * Process the current input line in the STATUS_RAW_PARENT state.  This
357         * state gathers parent revisions of a log entry.
358         *
359         * @param line a line of text from the git log output
360         */
361        private void processGetRawParent( String line )
362        {
363            if ( !rawParentRegexp.match( line ) )
364            {
365                status = STATUS_RAW_AUTHOR;
366                processGetRawAuthor( line );
367                return;
368            }
369            String parentHash = rawParentRegexp.getParen( 1 );
370    
371            addParentRevision( parentHash );
372        }
373    
374        /**
375         * In git log, both parent and merged revisions are called parent. Fortunately, the real parent comes first in the log.
376         * This method takes care of the difference.
377         *
378         * @param hash -
379         */
380        private void addParentRevision( String hash )
381        {
382            if ( currentChange.getParentRevision() == null )
383            {
384                currentChange.setParentRevision( hash );
385            }
386            else
387            {
388                currentChange.addMergedRevision( hash );
389            }
390        }
391    
392        /**
393         * Process the current input line in the STATUS_RAW_AUTHOR state.  This
394         * state gathers all the author information of a log entry.
395         *
396         * @param line a line of text from the git log output
397         */
398        private void processGetRawAuthor( String line )
399        {
400            if ( !rawAuthorRegexp.match( line ) )
401            {
402                return;
403            }
404            String author = rawAuthorRegexp.getParen( 1 );
405            currentChange.setAuthor( author );
406    
407            String datestring = rawAuthorRegexp.getParen( 2 );
408            String tz = rawAuthorRegexp.getParen( 3 );
409    
410            // with --format=raw option (which gets us to this methods), date is always in seconds since beginning of time
411            // even explicit --date=iso is ignored, so we ignore both userDateFormat and GIT_TIMESTAMP_PATTERN here
412            Calendar c = Calendar.getInstance( TimeZone.getTimeZone( tz ) );
413            c.setTimeInMillis( Long.parseLong( datestring ) * 1000 );
414            currentChange.setDate( c.getTime() );
415    
416            status = STATUS_RAW_COMMITTER;
417        }
418    
419        /**
420         * Process the current input line in the STATUS_RAW_AUTHOR state.  This
421         * state gathers all the committer information of a log entry.
422         *
423         * @param line a line of text from the git log output
424         */
425        private void processGetRawCommitter( String line )
426        {
427            if ( !rawCommitterRegexp.match( line ) )
428            {
429                return;
430            }
431            // here we could set committer and committerDate, the same way as in processGetRawAuthor
432            status = STATUS_GET_COMMENT;
433        }
434    
435        /**
436         * Process the current input line in the STATUS_GET_DATE state.  This
437         * state gathers all of the date information that are part of a log entry.
438         *
439         * @param line a line of text from the git log output
440         */
441        private void processGetDate( String line, Locale locale )
442        {
443            if ( !dateRegexp.match( line ) )
444            {
445                return;
446            }
447    
448            String datestring = dateRegexp.getParen( 1 );
449    
450            Date date = parseDate( datestring.trim(), userDateFormat, GIT_TIMESTAMP_PATTERN, locale );
451    
452            currentChange.setDate( date );
453    
454            status = STATUS_GET_COMMENT;
455        }
456    
457        /**
458         * Process the current input line in the GET_COMMENT state.  This
459         * state gathers all of the comments that are part of a log entry.
460         *
461         * @param line a line of text from the git log output
462         */
463        private void processGetComment( String line )
464        {
465            if ( line.length() < 4 )
466            {
467                if ( currentComment == null )
468                {
469                    currentComment = new StringBuilder();
470                }
471                else
472                {
473                    currentChange.setComment( currentComment.toString() );
474                    status = STATUS_GET_FILE;
475                }
476            }
477            else
478            {
479                if ( currentComment.length() > 0 )
480                {
481                    currentComment.append( '\n' );
482                }
483    
484                currentComment.append( line.substring( 4 ) );
485            }
486        }
487    
488        /**
489         * Process the current input line in the GET_FILE state.  This state
490         * adds each file entry line to the current change log entry.  Note,
491         * the revision number for the entire entry is used for the revision
492         * number of each file.
493         *
494         * @param line A line of text from the git log output
495         */
496        private void processGetFile( String line )
497        {
498            if ( line.length() == 0 )
499            {
500                if ( currentChange != null )
501                {
502                    entries.add( currentChange );
503                }
504    
505                resetChangeLog();
506    
507                status = STATUS_GET_HEADER;
508            }
509            else
510            {
511                if ( !fileRegexp.match( line ) )
512                {
513                    return;
514                }
515                final String actionChar = fileRegexp.getParen( 1 );
516                // action is currently not used
517                final ScmFileStatus action;
518                String name = fileRegexp.getParen( 2 );
519                String originalName = null;
520                String originalRevision = null;
521                if ( "A".equals( actionChar ) )
522                {
523                    action = ScmFileStatus.ADDED;
524                }
525                else if ( "M".equals( actionChar ) )
526                {
527                    action = ScmFileStatus.MODIFIED;
528                }
529                else if ( "D".equals( actionChar ) )
530                {
531                    action = ScmFileStatus.DELETED;
532                }
533                else if ( "R".equals( actionChar ) )
534                {
535                    action = ScmFileStatus.RENAMED;
536                    originalName = name;
537                    name = fileRegexp.getParen( 4 );
538                    originalRevision = currentChange.getParentRevision();
539                }
540                else if ( "C".equals( actionChar ) )
541                {
542                    action = ScmFileStatus.COPIED;
543                    originalName = name;
544                    name = fileRegexp.getParen( 4 );
545                    originalRevision = currentChange.getParentRevision();
546                }
547                else
548                {
549                    action = ScmFileStatus.UNKNOWN;
550                }
551    
552                final ChangeFile changeFile = new ChangeFile( name, currentRevision );
553                changeFile.setAction( action );
554                changeFile.setOriginalName( originalName );
555                changeFile.setOriginalRevision( originalRevision );
556                currentChange.addFile( changeFile );
557            }
558        }
559    
560        private void resetChangeLog()
561        {
562            currentComment = null;
563            currentChange = null;
564        }
565    }