001package org.apache.maven.scm.provider.git.gitexe.command.changelog;
002
003/*
004 * Licensed to the Apache Software Foundation (ASF) under one
005 * or more contributor license agreements.  See the NOTICE file
006 * distributed with this work for additional information
007 * regarding copyright ownership.  The ASF licenses this file
008 * to you under the Apache License, Version 2.0 (the
009 * "License"); you may not use this file except in compliance
010 * with the License.  You may obtain a copy of the License at
011 *
012 * http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing,
015 * software distributed under the License is distributed on an
016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017 * KIND, either express or implied.  See the License for the
018 * specific language governing permissions and limitations
019 * under the License.
020 */
021
022import org.apache.maven.scm.ChangeFile;
023import org.apache.maven.scm.ChangeSet;
024import org.apache.maven.scm.ScmFileStatus;
025import org.apache.maven.scm.log.ScmLogger;
026import org.apache.maven.scm.util.AbstractConsumer;
027
028import java.util.ArrayList;
029import java.util.Calendar;
030import java.util.Date;
031import java.util.List;
032import java.util.Locale;
033import java.util.TimeZone;
034import java.util.regex.Matcher;
035import java.util.regex.Pattern;
036
037/**
038 * @author <a href="mailto:struberg@yahoo.de">Mark Struberg</a>
039 * @author Olivier Lamy
040 *
041 */
042public class GitChangeLogConsumer
043    extends AbstractConsumer
044{
045    /**
046     * Date formatter for git timestamp
047     * we use iso format cli git log --date=iso sample : 2008-08-06 01:37:18 +0200
048     */
049    private static final String GIT_TIMESTAMP_PATTERN = "yyyy-MM-dd HH:mm:ss Z";
050
051    /**
052     * State machine constant: expecting header
053     */
054    private static final int STATUS_GET_HEADER = 1;
055
056    /**
057     * State machine constant: expecting author information
058     */
059    private static final int STATUS_GET_AUTHOR = 2;
060
061    /**
062     * State machine constant: expecting parent hash information
063     */
064    private static final int STATUS_RAW_TREE = 21;
065
066    /**
067     * State machine constant: expecting parent hash information
068     */
069    private static final int STATUS_RAW_PARENT = 22;
070
071    /**
072     * State machine constant: expecting author name, email and timestamp information
073     */
074    private static final int STATUS_RAW_AUTHOR = 23;
075
076    /**
077     * State machine constant: expecting committer name, email and timestamp information
078     */
079    private static final int STATUS_RAW_COMMITTER = 24;
080
081    /**
082     * State machine constant: expecting date information
083     */
084    private static final int STATUS_GET_DATE = 3;
085
086    /**
087     * State machine constant: expecting file information
088     */
089    private static final int STATUS_GET_FILE = 4;
090
091    /**
092     * State machine constant: expecting comments
093     */
094    private static final int STATUS_GET_COMMENT = 5;
095
096    /**
097     * The pattern used to match git header lines
098     */
099    private static final Pattern HEADER_PATTERN = Pattern.compile( "^commit (.*)" );
100
101    /**
102     * The pattern used to match git author lines
103     */
104    private static final Pattern AUTHOR_PATTERN = Pattern.compile( "^Author: (.*)" );
105
106    /**
107     * The pattern used to match git tree hash lines (raw mode)
108     */
109    private static final Pattern RAW_TREE_PATTERN = Pattern.compile( "^tree ([A-Fa-f0-9]+)" ); 
110
111    /**
112     * The pattern used to match git parent hash lines (raw mode)
113     */
114    private static final Pattern RAW_PARENT_PATTERN = Pattern.compile( "^parent ([A-Fa-f0-9]+)" );
115
116    /**
117     * The pattern used to match git author lines (raw mode)
118     */
119    private static final Pattern RAW_AUTHOR_PATTERN = Pattern.compile( "^author (.+ <.+>) ([0-9]+) (.*)" );
120
121    /**
122     * The pattern used to match git author lines (raw mode)
123     */
124    private static final Pattern RAW_COMMITTER_PATTERN = Pattern.compile( "^committer (.+ <.+>) ([0-9]+) (.*)" );
125
126    /**
127     * The pattern used to match git date lines
128     */
129    private static final Pattern DATE_PATTERN = Pattern.compile( "^Date:\\s*(.*)" );
130
131    /**
132     * The pattern used to match git file lines
133     */
134    private static final Pattern FILE_PATTERN =
135        Pattern.compile( "^:\\d* \\d* [A-Fa-f0-9]*\\.* [A-Fa-f0-9]*\\.* ([A-Z])[0-9]*\\t([^\\t]*)(\\t(.*))?" );
136
137    /**
138     * Current status of the parser
139     */
140    private int status = STATUS_GET_HEADER;
141
142    /**
143     * List of change log entries
144     */
145    private List<ChangeSet> entries = new ArrayList<ChangeSet>();
146
147    /**
148     * The current log entry being processed by the parser
149     */
150    private ChangeSet currentChange;
151
152    /**
153     * The current revision of the entry being processed by the parser
154     */
155    private String currentRevision;
156
157    /**
158     * The current comment of the entry being processed by the parser
159     */
160    private StringBuilder currentComment;
161
162    private String userDateFormat;
163
164    /**
165     * Default constructor.
166     */
167    public GitChangeLogConsumer( ScmLogger logger, String userDateFormat )
168    {
169        super( logger );
170
171        this.userDateFormat = userDateFormat;
172    }
173
174    public List<ChangeSet> getModifications()
175    {
176        // this is needed since the processFile does not always get a the end-sequence correctly.
177        processGetFile( "" );
178
179        return entries;
180    }
181
182    // ----------------------------------------------------------------------
183    // StreamConsumer Implementation
184    // ----------------------------------------------------------------------
185
186    /**
187     * {@inheritDoc}
188     */
189    public void consumeLine( String line )
190    {
191        switch ( status )
192        {
193            case STATUS_GET_HEADER:
194                processGetHeader( line );
195                break;
196            case STATUS_GET_AUTHOR:
197                processGetAuthor( line );
198                break;
199            case STATUS_GET_DATE:
200                processGetDate( line, null );
201                break;
202            case STATUS_GET_COMMENT:
203                processGetComment( line );
204                break;
205            case STATUS_GET_FILE:
206                processGetFile( line );
207                break;
208            case STATUS_RAW_TREE:
209                processGetRawTree( line );
210                break;
211            case STATUS_RAW_PARENT:
212                processGetRawParent( line );
213                break;
214            case STATUS_RAW_AUTHOR:
215                processGetRawAuthor( line );
216                break;
217            case STATUS_RAW_COMMITTER:
218                processGetRawCommitter( line );
219                break;
220            default:
221                throw new IllegalStateException( "Unknown state: " + status );
222        }
223    }
224
225    // ----------------------------------------------------------------------
226    //
227    // ----------------------------------------------------------------------
228
229    /**
230     * Process the current input line in the GET_HEADER state.  The
231     * author, date, and the revision of the entry are gathered.  Note,
232     * Git does not have per-file revisions, instead, the entire
233     * branch is given a single revision number, which is also used for
234     * the revision number of each file.
235     *
236     * @param line A line of text from the git log output
237     */
238    private void processGetHeader( String line )
239    {
240        Matcher matcher = HEADER_PATTERN.matcher( line );
241        if ( !matcher.matches() )
242        {
243            return;
244        }
245
246        currentRevision = matcher.group( 1 );
247
248        currentChange = new ChangeSet();
249
250        currentChange.setRevision( currentRevision );
251
252        status = STATUS_GET_AUTHOR;
253    }
254
255    /**
256     * Process the current input line in the STATUS_GET_AUTHOR state.  This
257     * state gathers all of the author information that are part of a log entry.
258     *
259     * @param line a line of text from the git log output
260     */
261    private void processGetAuthor( String line )
262    {
263        // this autodetects 'raw' format
264        if ( RAW_TREE_PATTERN.matcher( line ).matches() )
265        {
266            status = STATUS_RAW_TREE;
267            processGetRawTree( line );
268            return;
269        }
270
271        Matcher matcher = AUTHOR_PATTERN.matcher( line );
272        if ( !matcher.matches() )
273        {
274            return;
275        }
276        String author = matcher.group( 1 );
277
278        currentChange.setAuthor( author );
279
280        status = STATUS_GET_DATE;
281    }
282
283    /**
284     * Process the current input line in the STATUS_RAW_TREE state.  This
285     * state gathers tree hash part of a log entry.
286     *
287     * @param line a line of text from the git log output
288     */
289    private void processGetRawTree( String line )
290    {
291        if ( !RAW_TREE_PATTERN.matcher( line ).matches() )
292        {
293            return;
294        }
295        //here we could set treeHash if it appears in the model: currentChange.setTreeHash( matcher.group( 1 ) );
296        status = STATUS_RAW_PARENT;
297    }
298
299    /**
300     * Process the current input line in the STATUS_RAW_PARENT state.  This
301     * state gathers parent revisions of a log entry.
302     *
303     * @param line a line of text from the git log output
304     */
305    private void processGetRawParent( String line )
306    {
307        Matcher matcher = RAW_PARENT_PATTERN.matcher( line );
308        if ( !matcher.matches() )
309        {
310            status = STATUS_RAW_AUTHOR;
311            processGetRawAuthor( line );
312            return;
313        }
314        String parentHash = matcher.group( 1 );
315
316        addParentRevision( parentHash );
317    }
318
319    /**
320     * In git log, both parent and merged revisions are called parent. Fortunately, the real parent comes first in the
321     * log. This method takes care of the difference.
322     *
323     * @param hash -
324     */
325    private void addParentRevision( String hash )
326    {
327        if ( currentChange.getParentRevision() == null )
328        {
329            currentChange.setParentRevision( hash );
330        }
331        else
332        {
333            currentChange.addMergedRevision( hash );
334        }
335    }
336
337    /**
338     * Process the current input line in the STATUS_RAW_AUTHOR state.  This
339     * state gathers all the author information of a log entry.
340     *
341     * @param line a line of text from the git log output
342     */
343    private void processGetRawAuthor( String line )
344    {
345        Matcher matcher = RAW_AUTHOR_PATTERN.matcher( line );
346        if ( !matcher.matches() )
347        {
348            return;
349        }
350        String author = matcher.group( 1 );
351        currentChange.setAuthor( author );
352
353        String datestring = matcher.group( 2 );
354        String tz = matcher.group( 3 );
355
356        // with --format=raw option (which gets us to this methods), date is always in seconds since beginning of time
357        // even explicit --date=iso is ignored, so we ignore both userDateFormat and GIT_TIMESTAMP_PATTERN here
358        Calendar c = Calendar.getInstance( TimeZone.getTimeZone( tz ) );
359        c.setTimeInMillis( Long.parseLong( datestring ) * 1000 );
360        currentChange.setDate( c.getTime() );
361
362        status = STATUS_RAW_COMMITTER;
363    }
364
365    /**
366     * Process the current input line in the STATUS_RAW_AUTHOR state.  This
367     * state gathers all the committer information of a log entry.
368     *
369     * @param line a line of text from the git log output
370     */
371    private void processGetRawCommitter( String line )
372    {
373        if ( !RAW_COMMITTER_PATTERN.matcher( line ).matches() )
374        {
375            return;
376        }
377        // here we could set committer and committerDate, the same way as in processGetRawAuthor
378        status = STATUS_GET_COMMENT;
379    }
380
381    /**
382     * Process the current input line in the STATUS_GET_DATE state.  This
383     * state gathers all of the date information that are part of a log entry.
384     *
385     * @param line a line of text from the git log output
386     */
387    private void processGetDate( String line, Locale locale )
388    {
389        Matcher matcher = DATE_PATTERN.matcher( line );
390        if ( !matcher.matches() )
391        {
392            return;
393        }
394
395        String datestring = matcher.group( 1 );
396
397        Date date = parseDate( datestring.trim(), userDateFormat, GIT_TIMESTAMP_PATTERN, locale );
398
399        currentChange.setDate( date );
400
401        status = STATUS_GET_COMMENT;
402    }
403
404    /**
405     * Process the current input line in the GET_COMMENT state.  This
406     * state gathers all of the comments that are part of a log entry.
407     *
408     * @param line a line of text from the git log output
409     */
410    private void processGetComment( String line )
411    {
412        if ( line.length() < 4 )
413        {
414            if ( currentComment == null )
415            {
416                currentComment = new StringBuilder();
417            }
418            else
419            {
420                currentChange.setComment( currentComment.toString() );
421                status = STATUS_GET_FILE;
422            }
423        }
424        else
425        {
426            if ( currentComment.length() > 0 )
427            {
428                currentComment.append( '\n' );
429            }
430
431            currentComment.append( line.substring( 4 ) );
432        }
433    }
434
435    /**
436     * Process the current input line in the GET_FILE state.  This state
437     * adds each file entry line to the current change log entry.  Note,
438     * the revision number for the entire entry is used for the revision
439     * number of each file.
440     *
441     * @param line A line of text from the git log output
442     */
443    private void processGetFile( String line )
444    {
445        if ( line.length() == 0 )
446        {
447            if ( currentChange != null )
448            {
449                entries.add( currentChange );
450            }
451
452            resetChangeLog();
453
454            status = STATUS_GET_HEADER;
455        }
456        else
457        {
458            Matcher matcher = FILE_PATTERN.matcher( line );
459            if ( !matcher.matches() )
460            {
461                return;
462            }
463            final String actionChar = matcher.group( 1 );
464            // action is currently not used
465            final ScmFileStatus action;
466            String name = matcher.group( 2 );
467            String originalName = null;
468            String originalRevision = null;
469            if ( "A".equals( actionChar ) )
470            {
471                action = ScmFileStatus.ADDED;
472            }
473            else if ( "M".equals( actionChar ) )
474            {
475                action = ScmFileStatus.MODIFIED;
476            }
477            else if ( "D".equals( actionChar ) )
478            {
479                action = ScmFileStatus.DELETED;
480            }
481            else if ( "R".equals( actionChar ) )
482            {
483                action = ScmFileStatus.RENAMED;
484                originalName = name;
485                name = matcher.group( 4 );
486                originalRevision = currentChange.getParentRevision();
487            }
488            else if ( "C".equals( actionChar ) )
489            {
490                action = ScmFileStatus.COPIED;
491                originalName = name;
492                name = matcher.group( 4 );
493                originalRevision = currentChange.getParentRevision();
494            }
495            else
496            {
497                action = ScmFileStatus.UNKNOWN;
498            }
499
500            final ChangeFile changeFile = new ChangeFile( name, currentRevision );
501            changeFile.setAction( action );
502            changeFile.setOriginalName( originalName );
503            changeFile.setOriginalRevision( originalRevision );
504            currentChange.addFile( changeFile );
505        }
506    }
507
508    private void resetChangeLog()
509    {
510        currentComment = null;
511        currentChange = null;
512    }
513}