ChunkWriter

package org.apache.maven.index.reader;

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

import java.io.Closeable;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Date;
import java.util.Iterator;
import java.util.Map;
import java.util.zip.GZIPOutputStream;

/**
 * Maven 2 Index published binary chunk writer, it writes raw Maven Indexer records to the transport binary format.
 *
 * @since 5.1.2
 */
public class ChunkWriter
    implements Closeable
{
    private static final int F_INDEXED = 1;

    private static final int F_TOKENIZED = 2;

    private static final int F_STORED = 4;

    private final String chunkName;

    private final DataOutputStream dataOutputStream;

    private final int version;

    private final Date timestamp;

    public ChunkWriter( final String chunkName, final OutputStream outputStream, final int version,
                        final Date timestamp )
        throws IOException
    {
        this.chunkName = chunkName.trim();
        this.dataOutputStream = new DataOutputStream( new GZIPOutputStream( outputStream, 2 * 1024 ) );
        this.version = version;
        this.timestamp = timestamp;

        dataOutputStream.writeByte( version );
        dataOutputStream.writeLong( timestamp == null ? -1 : timestamp.getTime() );
    }

    /**
     * Returns the chunk name.
     */
    public String getName()
    {
        return chunkName;
    }

    /**
     * Returns index version. All releases so far always returned {@code 1}.
     */
    public int getVersion()
    {
        return version;
    }

    /**
     * Returns the index timestamp of last update of the index.
     */
    public Date getTimestamp()
    {
        return timestamp;
    }

    /**
     * Writes out the record iterator and returns the written record count.
     */
    public int writeChunk( final Iterator<Map<String, String>> iterator )
        throws IOException
    {
        int written = 0;
        while ( iterator.hasNext() )
        {
            writeRecord( iterator.next(), dataOutputStream );
            written++;
        }
        return written;
    }

    /**
     * Closes this reader and it's underlying input.
     */
    public void close()
        throws IOException
    {
        dataOutputStream.close();
    }

    private static void writeRecord( final Map<String, String> record, final DataOutput dataOutput )
        throws IOException
    {
        dataOutput.writeInt( record.size() );
        for ( Map.Entry<String, String> entry : record.entrySet() )
        {
            writeField( entry.getKey(), entry.getValue(), dataOutput );
        }
    }

    private static void writeField( final String fieldName, final String fieldValue, final DataOutput dataOutput )
        throws IOException
    {
        boolean isIndexed = !( fieldName.equals( "i" ) || fieldName.equals( "m" ) );
        boolean isTokenized =
            !( fieldName.equals( "i" ) || fieldName.equals( "m" ) || fieldName.equals( "1" ) || fieldName.equals(
                "px" ) );
        int flags = ( isIndexed ? F_INDEXED : 0 ) + ( isTokenized ? F_TOKENIZED : 0 ) + F_STORED;
        dataOutput.writeByte( flags );
        dataOutput.writeUTF( fieldName );
        writeUTF( fieldValue, dataOutput );
    }

    private static void writeUTF( final String str, final DataOutput dataOutput )
        throws IOException
    {
        int strlen = str.length();
        int utflen = 0;
        int c;
        // use charAt instead of copying String to char array
        for ( int i = 0; i < strlen; i++ )
        {
            c = str.charAt( i );
            if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
            {
                utflen++;
            }
            else if ( c > 0x07FF )
            {
                utflen += 3;
            }
            else
            {
                utflen += 2;
            }
        }
        dataOutput.writeInt( utflen );
        byte[] bytearr = new byte[utflen];
        int count = 0;
        int i = 0;
        for ( ; i < strlen; i++ )
        {
            c = str.charAt( i );
            if ( !( ( c >= 0x0001 ) && ( c <= 0x007F ) ) )
            {
                break;
            }
            bytearr[count++] = (byte) c;
        }
        for ( ; i < strlen; i++ )
        {
            c = str.charAt( i );
            if ( ( c >= 0x0001 ) && ( c <= 0x007F ) )
            {
                bytearr[count++] = (byte) c;

            }
            else if ( c > 0x07FF )
            {
                bytearr[count++] = (byte) ( 0xE0 | ( ( c >> 12 ) & 0x0F ) );
                bytearr[count++] = (byte) ( 0x80 | ( ( c >> 6 ) & 0x3F ) );
                bytearr[count++] = (byte) ( 0x80 | ( ( c ) & 0x3F ) );
            }
            else
            {
                bytearr[count++] = (byte) ( 0xC0 | ( ( c >> 6 ) & 0x1F ) );
                bytearr[count++] = (byte) ( 0x80 | ( ( c ) & 0x3F ) );
            }
        }
        dataOutput.write( bytearr, 0, utflen );
    }
}