001package org.apache.maven.scm.provider.git.gitexe.command.status;
002
003/*
004 * Licensed to the Apache Software Foundation (ASF) under one
005 * or more contributor license agreements.  See the NOTICE file
006 * distributed with this work for additional information
007 * regarding copyright ownership.  The ASF licenses this file
008 * to you under the Apache License, Version 2.0 (the
009 * "License"); you may not use this file except in compliance
010 * with the License.  You may obtain a copy of the License at
011 *
012 * http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing,
015 * software distributed under the License is distributed on an
016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017 * KIND, either express or implied.  See the License for the
018 * specific language governing permissions and limitations
019 * under the License.
020 */
021
022import java.io.File;
023import java.io.UnsupportedEncodingException;
024import java.net.URI;
025import java.net.URISyntaxException;
026import java.util.ArrayList;
027import java.util.List;
028import java.util.regex.Matcher;
029import java.util.regex.Pattern;
030
031import org.apache.commons.lang.StringUtils;
032import org.apache.maven.scm.ScmFile;
033import org.apache.maven.scm.ScmFileStatus;
034import org.apache.maven.scm.ScmFileSet;
035import org.apache.maven.scm.log.ScmLogger;
036import org.codehaus.plexus.util.cli.StreamConsumer;
037
038/**
039 * @author <a href="mailto:struberg@yahoo.de">Mark Struberg</a>
040 */
041public class GitStatusConsumer
042    implements StreamConsumer
043{
044
045    /**
046     * The pattern used to match added file lines
047     */
048    private static final Pattern ADDED_PATTERN = Pattern.compile( "^A[ M]* (.*)$" );
049
050    /**
051     * The pattern used to match modified file lines
052     */
053    private static final Pattern MODIFIED_PATTERN = Pattern.compile( "^ *M[ M]* (.*)$" );
054
055    /**
056     * The pattern used to match deleted file lines
057     */
058    private static final Pattern DELETED_PATTERN = Pattern.compile( "^ *D * (.*)$" );
059
060    /**
061     * The pattern used to match renamed file lines
062     */
063    private static final Pattern RENAMED_PATTERN = Pattern.compile( "^R  (.*) -> (.*)$" );
064
065    private ScmLogger logger;
066
067    private File workingDirectory;
068
069    private ScmFileSet scmFileSet;
070
071    /**
072     * Entries are relative to working directory, not to the repositoryroot
073     */
074    private List<ScmFile> changedFiles = new ArrayList<ScmFile>();
075
076    private URI relativeRepositoryPath;
077    
078    // ----------------------------------------------------------------------
079    //
080    // ----------------------------------------------------------------------
081
082    /**
083     * Consumer when workingDirectory and repositoryRootDirectory are the same
084     * 
085     * @param logger the logger
086     * @param workingDirectory the working directory
087     */
088    public GitStatusConsumer( ScmLogger logger, File workingDirectory )
089    {
090        this.logger = logger;
091        this.workingDirectory = workingDirectory;
092    }
093
094    /**
095     * Assuming that you have to discover the repositoryRoot, this is how you can get the
096     * <code>relativeRepositoryPath</code>
097     * <pre>
098     * URI.create( repositoryRoot ).relativize( fileSet.getBasedir().toURI() )
099     * </pre>
100     * 
101     * @param logger the logger
102     * @param workingDirectory the working directory
103     * @param relativeRepositoryPath the working directory relative to the repository root
104     * @since 1.9
105     * @see GitStatusCommand#createRevparseShowPrefix(ScmFileSet)
106     */
107    public GitStatusConsumer( ScmLogger logger, File workingDirectory, URI relativeRepositoryPath )
108    {
109        this( logger, workingDirectory );
110        this.relativeRepositoryPath = relativeRepositoryPath;
111    }
112
113    /**
114     * Assuming that you have to discover the repositoryRoot, this is how you can get the
115     * <code>relativeRepositoryPath</code>
116     * <pre>
117     * URI.create( repositoryRoot ).relativize( fileSet.getBasedir().toURI() )
118     * </pre>
119     *
120     * @param logger the logger
121     * @param workingDirectory the working directory
122     * @param scmFileSet fileset with includes and excludes
123     * @since 1.11.0
124     * @see GitStatusCommand#createRevparseShowPrefix(ScmFileSet)
125     */
126    public GitStatusConsumer( ScmLogger logger, File workingDirectory, ScmFileSet scmFileSet )
127    {
128        this( logger, workingDirectory );
129        this.scmFileSet = scmFileSet;
130    }
131
132    /**
133     * Assuming that you have to discover the repositoryRoot, this is how you can get the
134     * <code>relativeRepositoryPath</code>
135     * <pre>
136     * URI.create( repositoryRoot ).relativize( fileSet.getBasedir().toURI() )
137     * </pre>
138     *
139     * @param logger the logger
140     * @param workingDirectory the working directory
141     * @param relativeRepositoryPath the working directory relative to the repository root
142     * @param scmFileSet fileset with includes and excludes
143     * @since 1.11.0
144     * @see GitStatusCommand#createRevparseShowPrefix(ScmFileSet)
145     */
146    public GitStatusConsumer( ScmLogger logger, File workingDirectory, URI relativeRepositoryPath,
147                              ScmFileSet scmFileSet )
148    {
149        this( logger, workingDirectory, scmFileSet );
150        this.relativeRepositoryPath = relativeRepositoryPath;
151    }
152
153    // ----------------------------------------------------------------------
154    // StreamConsumer Implementation
155    // ----------------------------------------------------------------------
156
157    /**
158     * {@inheritDoc}
159     */
160    public void consumeLine( String line )
161    {
162        if ( logger.isDebugEnabled() )
163        {
164            logger.debug( line );
165        }
166        if ( StringUtils.isEmpty( line ) )
167        {
168            return;
169        }
170
171        ScmFileStatus status = null;
172
173        List<String> files = new ArrayList<String>();
174        
175        Matcher matcher;
176        if ( ( matcher = ADDED_PATTERN.matcher( line ) ).find() )
177        {
178            status = ScmFileStatus.ADDED;
179            files.add( resolvePath( matcher.group( 1 ), relativeRepositoryPath ) );
180        }
181        else if ( ( matcher = MODIFIED_PATTERN.matcher( line ) ).find() )
182        {
183            status = ScmFileStatus.MODIFIED;
184            files.add( resolvePath( matcher.group( 1 ), relativeRepositoryPath ) );
185        }
186        else if ( ( matcher = DELETED_PATTERN.matcher( line ) ).find() )
187        {
188            status = ScmFileStatus.DELETED;
189            files.add( resolvePath( matcher.group( 1 ), relativeRepositoryPath ) );
190        }
191        else if ( ( matcher = RENAMED_PATTERN.matcher( line ) ).find() )
192        {
193            status = ScmFileStatus.RENAMED;
194            files.add( resolvePath( matcher.group( 1 ), relativeRepositoryPath ) );
195            files.add( resolvePath( matcher.group( 2 ), relativeRepositoryPath ) );
196            logger.debug( "RENAMED status for line '" + line + "' files added '" + matcher.group( 1 ) + "' '"
197                              + matcher.group( 2 ) );
198        }
199        else
200        {
201            logger.warn( "Ignoring unrecognized line: " + line );
202            return;
203        }
204
205        // If the file isn't a file; don't add it.
206        if ( !files.isEmpty() )
207        {
208            if ( workingDirectory != null )
209            {
210                if ( status == ScmFileStatus.RENAMED )
211                {
212                    String oldFilePath = files.get( 0 );
213                    String newFilePath = files.get( 1 );
214                    if ( isFile( oldFilePath ) )
215                    {
216                        logger.debug( "file '" + oldFilePath + "' is a file" );
217                        return;
218                    }
219                    else
220                    {
221                        logger.debug( "file '" + oldFilePath + "' not a file" );
222                    }
223                    if ( !isFile( newFilePath ) )
224                    {
225                        logger.debug( "file '" + newFilePath + "' not a file" );
226                        return;
227                    }
228                    else
229                    {
230                        logger.debug( "file '" + newFilePath + "' is a file" );
231                    }
232                }
233                else if ( status == ScmFileStatus.DELETED )
234                {
235                    if ( isFile( files.get( 0 ) ) )
236                    {
237                        return;
238                    }
239                }
240                else
241                {
242                    if ( !isFile( files.get( 0 ) ) )
243                    {
244                        return;
245                    }
246                }
247            }
248
249            for ( String file : files )
250            {
251                if ( this.scmFileSet != null && !isFileNameInFileList( this.scmFileSet.getFileList(), file ) )
252                {
253                    // skip adding this file
254                }
255                else
256                {
257                    changedFiles.add( new ScmFile( file, status ) );
258                }
259            }
260        }
261    }
262
263    private boolean isFileNameInFileList( List<File> fileList, String fileName )
264    {
265        if ( relativeRepositoryPath == null )
266        {
267          return fileList.contains( new File( fileName ) );
268        }
269        else
270        {
271            for ( File f : fileList )
272            {
273                File file = new File( relativeRepositoryPath.getPath(), fileName );
274                if ( file.getPath().endsWith( f.getName() ) )
275                {
276                    return true;
277                }
278            }
279            return fileList.isEmpty();
280        }
281
282    }
283
284    private boolean isFile( String file )
285    {
286        File targetFile = new File( workingDirectory, file );
287        return targetFile.isFile();
288    }
289
290    protected static String resolvePath( String fileEntry, URI path )
291    {
292        /* Quotes may be included (from the git status line) when an fileEntry includes spaces */
293        String cleanedEntry = stripQuotes( fileEntry );
294        if ( path != null )
295        {
296            return resolveURI( cleanedEntry, path ).getPath();
297        }
298        else
299        {
300            return cleanedEntry;
301        }
302    }
303
304    /**
305     * 
306     * @param fileEntry the fileEntry, must not be {@code null}
307     * @param path the path, must not be {@code null}
308     * @return TODO
309     */
310    public static URI resolveURI( String fileEntry, URI path )
311    {
312        // When using URI.create, spaces need to be escaped but not the slashes, so we can't use
313        // URLEncoder.encode( String, String )
314        // new File( String ).toURI() results in an absolute URI while path is relative, so that can't be used either.
315        return path.relativize( uriFromPath( stripQuotes ( fileEntry ) ) );
316    }
317
318    /**
319     * Create an URI whose getPath() returns the given path and getScheme() returns null. The path may contain spaces,
320     * colons, and other special characters.
321     * 
322     * @param path the path.
323     * @return the new URI
324     */
325    public static URI uriFromPath( String path )
326    {
327        try
328        {
329            if ( path != null && path.indexOf( ':' ) != -1 )
330            {
331                // prefixing the path so the part preceding the colon does not become the scheme
332                String tmp = new URI( null, null, "/x" + path, null ).toString().substring( 2 );
333                // the colon is not escaped by default
334                return new URI( tmp.replace( ":", "%3A" ) );
335            }
336            else
337            {
338                return new URI( null, null, path, null );
339            }
340        }
341        catch ( URISyntaxException x )
342        {
343            throw new IllegalArgumentException( x.getMessage(), x );
344        }
345    }
346
347    public List<ScmFile> getChangedFiles()
348    {
349        return changedFiles;
350    }
351
352    /**
353     * @param str the (potentially quoted) string, must not be {@code null}
354     * @return the string with a pair of double quotes removed (if they existed)
355     */
356    private static String stripQuotes( String str )
357    {
358        int strLen = str.length();
359        return ( strLen > 0 && str.startsWith( "\"" ) && str.endsWith( "\"" ) )
360                        ? unescape( str.substring( 1, strLen - 1 ) )
361                        : str;
362    }
363    
364    /**
365     * Dequote a quoted string generated by git status --porcelain.
366     * The leading and trailing quotes have already been removed. 
367     * @param fileEntry
368     * @return TODO
369     */
370    private static String unescape( String fileEntry )
371    {
372        // If there are no escaped characters, just return the input argument
373        int pos = fileEntry.indexOf( '\\' );
374        if ( pos == -1 )
375        {
376            return fileEntry;
377        }
378        
379        // We have escaped characters
380        byte[] inba = fileEntry.getBytes();
381        int inSub = 0;      // Input subscript into fileEntry
382        byte[] outba = new byte[fileEntry.length()];
383        int outSub = 0;     // Output subscript into outba
384        
385        while ( true )
386        {
387            System.arraycopy( inba,  inSub,  outba, outSub, pos - inSub );
388            outSub += pos - inSub;
389            inSub = pos + 1;
390            switch ( (char) inba[inSub++] )
391            {
392                case '"':
393                    outba[outSub++] = '"';
394                    break;
395                    
396                case 'a':
397                    outba[outSub++] = 7;        // Bell
398                    break;
399                    
400                case 'b':
401                    outba[outSub++] = '\b';
402                    break;
403                    
404                case 't':
405                    outba[outSub++] = '\t';
406                    break;
407                    
408                case 'n':
409                    outba[outSub++] = '\n';
410                    break;
411                    
412                case 'v':
413                    outba[outSub++] = 11;       // Vertical tab
414                    break;
415                    
416                case 'f':
417                    outba[outSub++] = '\f';
418                    break;
419                    
420                case 'r':
421                    outba[outSub++] = '\f';
422                    break;
423                    
424                case '\\':
425                    outba[outSub++] = '\\';
426                    break;
427                    
428                case '0':
429                case '1':
430                case '2':
431                case '3':
432                    // This assumes that the octal escape here is valid.
433                    byte b = (byte) ( ( inba[inSub - 1] - '0' ) << 6 );
434                    b |= (byte) ( ( inba[inSub++] - '0' ) << 3 );
435                    b |= (byte) ( inba[inSub++] - '0' );
436                    outba[outSub++] = b;
437                    break;
438                    
439                default:
440                    //This is an invalid escape in a string.  Just copy it.
441                    outba[outSub++] = '\\';
442                    inSub--;
443                    break;
444            }
445            pos = fileEntry.indexOf( '\\', inSub );
446            if ( pos == -1 )        // No more backslashes; we're done
447            {
448                System.arraycopy( inba, inSub, outba, outSub, inba.length - inSub );
449                outSub += inba.length - inSub;
450                break;
451            }
452        }
453        try
454        {
455            // explicit say UTF-8, otherwise it'll fail at least on Windows cmdline
456            return new String( outba, 0, outSub, "UTF-8" );
457        }
458        catch ( UnsupportedEncodingException e )
459        {
460          throw new RuntimeException( e );    
461        }
462    }
463}