/*
 * (c) Copyright 2009 Hewlett-Packard Development Company, LP
 * All rights reserved.
 * [See end of file]
 */

package org.openjena.riot.tokens;

import static com.hp.hpl.jena.sparql.util.Utils.equal ;
import static org.openjena.atlas.lib.Chars.CH_COMMA ;
import static org.openjena.atlas.lib.Chars.CH_DOT ;
import static org.openjena.atlas.lib.Chars.CH_LBRACE ;
import static org.openjena.atlas.lib.Chars.CH_LBRACKET ;
import static org.openjena.atlas.lib.Chars.CH_LPAREN ;
import static org.openjena.atlas.lib.Chars.CH_RBRACE ;
import static org.openjena.atlas.lib.Chars.CH_RBRACKET ;
import static org.openjena.atlas.lib.Chars.CH_RPAREN ;
import static org.openjena.atlas.lib.Chars.CH_SEMICOLON ;
import static org.openjena.riot.tokens.TokenType.BNODE ;
import static org.openjena.riot.tokens.TokenType.DECIMAL ;
import static org.openjena.riot.tokens.TokenType.DOUBLE ;
import static org.openjena.riot.tokens.TokenType.INTEGER ;
import static org.openjena.riot.tokens.TokenType.IRI ;
import static org.openjena.riot.tokens.TokenType.LITERAL_DT ;
import static org.openjena.riot.tokens.TokenType.LITERAL_LANG ;
import static org.openjena.riot.tokens.TokenType.STRING ;
import static org.openjena.riot.tokens.TokenType.VAR ;

import java.util.ArrayList ;
import java.util.List ;

import org.openjena.atlas.io.PeekReader ;
import org.openjena.atlas.iterator.Iter ;
import org.openjena.atlas.lib.Pair ;
import org.openjena.riot.PrefixMap ;
import org.openjena.riot.Prologue ;
import org.openjena.riot.RiotException ;

import com.hp.hpl.jena.datatypes.RDFDatatype ;
import com.hp.hpl.jena.datatypes.TypeMapper ;
import com.hp.hpl.jena.datatypes.xsd.XSDDatatype ;
import com.hp.hpl.jena.graph.Node ;
import com.hp.hpl.jena.rdf.model.AnonId ;
import com.hp.hpl.jena.sparql.util.FmtUtils ;
import com.hp.hpl.jena.sparql.util.Utils ;
import com.hp.hpl.jena.vocabulary.XSD ;

public final class Token
{
    private TokenType tokenType = null ;
    private String tokenImage = null ;
    private String tokenImage2 = null ;     // Used for language tag and second part of prefix name
    private Token subToken = null ;         // A related token (used for datatype literals)
    public int cntrlCode = 0 ;
    private long column ;
    private long line ;
    
    public final TokenType getType() { return tokenType ; }
    public final String getImage()   { return tokenImage ; }
    //public final String getImage1()  { return tokenImage1 ; }
    public final String getImage2()  { return tokenImage2 ; }
    public final int getCntrlCode()  { return cntrlCode ; }
    public final Token getSubToken() { return subToken ; }
    
    public final Token setType(TokenType tokenType)    { this.tokenType = tokenType ; return this ; }
    public final Token setImage(String tokenImage)     { this.tokenImage = tokenImage ; return this ; }
    //public final Token setImage1(String tokenImage1)   { this.tokenImage1 = tokenImage1 ; return this ; }
    public final Token setImage2(String tokenImage2)   { this.tokenImage2 = tokenImage2 ; return this ; }
    public final Token setCntrlCode(int cntrlCode)     { this.cntrlCode = cntrlCode ; return this ; }
    public final Token setSubToken(Token subToken)     { this.subToken = subToken ; return this ; }
    
    static Token create(String s)
    {
        PeekReader pr = PeekReader.readString(s) ;
        TokenizerText tt = new TokenizerText(pr) ;
        if ( ! tt.hasNext() )
            throw new RiotException("No token") ;
        Token t = tt.next() ;
        if ( tt.hasNext() )
            throw new RiotException("Extraneous charcaters") ;
        return t ;
    }

    static Iter<Token> createN(String s)
    {
        PeekReader pr = PeekReader.readString(s) ;
        TokenizerText tt = new TokenizerText(pr) ;
        List<Token> x = new ArrayList<Token>() ;
        while(tt.hasNext())
            x.add(tt.next()) ;
        return Iter.iter(x) ;
    }
    
    public long getColumn()
    {
        return column ;
    }

    public long getLine()
    {
        return line ;
    }
    
    private Token(TokenType type) { this(type, null, null, null) ; }
    
    private Token(TokenType type, String image1) { this(type, image1, null, null) ; }
    
    private Token(TokenType type, String image1, String image2)
    { this(type, image1, image2, null) ; }

    private Token(TokenType type, String image1, Token subToken)
    { this(type, image1, null, subToken) ; }


    private Token(TokenType type, String image1, String image2, Token subToken)
    {
        this() ;
        setType(type) ;
        setImage(image1) ;
        setImage2(image2) ;
        setSubToken(subToken) ;
    }

    
    private Token() { this(-1, -1) ; }
    
    public Token(long line, long column) { this.line = line ; this.column = column ; }
    
    public Token(Token token)
    { 
        this.tokenType = token.tokenType ;
        this.tokenImage = token.tokenImage ;
        this.tokenImage2 = token.tokenImage2 ;
        this.cntrlCode = token.cntrlCode ;
        this.line = token.line ; 
        this.column = token.column ;
    }
    
    public long asLong()
    {
        return asLong(-1) ;
    }
    
    public long asLong(long dft)
    {
        switch (tokenType)
        {
            case INTEGER:   return Long.valueOf(tokenImage) ;
            case HEX:       return Long.valueOf(tokenImage, 16) ;
             default:
                 return dft ;
        }
    }
    
    
    public String asWord()
    {
        if ( ! hasType(TokenType.KEYWORD) ) return null ;
        return getImage() ; 
    }
    
    public String text()
    {
        return toString(false) ;
        
    }
    
    @Override
    public String toString()
    {
        return toString(false) ;
    }
     
    static final String delim1 = "" ;
    static final String delim2 = "" ;
    public String toString(boolean addLocation)
    {
        StringBuilder sb = new StringBuilder() ;
        if ( addLocation && getLine() >= 0 && getColumn() >= 0 )
            sb.append(String.format("[%d,%d]", getLine(), getColumn())) ;
        sb.append("[") ;
        if ( getType() == null )
            sb.append("null") ;
        else
            sb.append(getType().toString()) ;
        
        if ( getImage() != null )
        {
            sb.append(":") ;
            sb.append(delim1) ;
            sb.append(getImage()) ;
            sb.append(delim1) ;
            
            if ( getImage2() != null )
            {
                sb.append(":") ;
                sb.append(delim2) ;
                sb.append(getImage2()) ;
                sb.append(delim2) ;
            }
            if ( getSubToken() != null )
            {
                sb.append(";") ;
                sb.append(delim2) ;
                sb.append(getSubToken().toString()) ;
                sb.append(delim2) ;
            }   

        }
        if ( getCntrlCode() != 0 )
        {
            sb.append(":") ; 
            sb.append(getCntrlCode()) ;
        }
        sb.append("]") ;
        return sb.toString() ;
    }
    
    public boolean isEOF() { return tokenType == TokenType.EOF ; }
    
    public boolean isCtlCode() { return tokenType == TokenType.CNTRL ; }

    public boolean isWord() { return tokenType == TokenType.KEYWORD ; }

    public boolean isString()
    {
        switch(tokenType)
        {
            case STRING:
            case STRING1:
            case STRING2:
            case LONG_STRING1:
            case LONG_STRING2:
                return true ;
            default:
                return false ;
        }
    }

    public boolean isNumber()
    {
        switch(tokenType)
        {
            case DECIMAL: 
            case DOUBLE:
            case INTEGER:
                return true ;
            default:
                return false ;
        }
    }
    
    public boolean isNode()
    {
        switch(tokenType)
        {
            case BNODE :
            case IRI : 
            case PREFIXED_NAME :
            case DECIMAL: 
            case DOUBLE:
            case INTEGER:
            case LITERAL_DT:
            case LITERAL_LANG:
            case STRING:
            case STRING1:
            case STRING2:
            case LONG_STRING1:
            case LONG_STRING2:
                return true ;
            default:
                return false ;
        }
    }
    
    // N-Triples but allows single quoted strings as well.
    public boolean isNodeBasic()
    {
        switch(tokenType)
        {
            case BNODE :
            case IRI : 
            case PREFIXED_NAME :
            case LITERAL_DT:
            case LITERAL_LANG:
            case STRING1:
            case STRING2:
                return true ;
            default:
                return false ;
        }
    }
    
    public boolean isBasicLiteral()
    {
        switch(tokenType)
        {
            case LITERAL_DT:
            case LITERAL_LANG:
            case STRING:
            case STRING1:
            case STRING2:
            case LONG_STRING1:
            case LONG_STRING2:
                return true ;
            default:
                return false ;
        }
    }
    
    public boolean isInteger()
    {
        return tokenType.equals(TokenType.INTEGER) ;
    }
    
    public boolean isIRI()
    {
        return tokenType.equals(TokenType.IRI) || tokenType.equals(TokenType.PREFIXED_NAME);
    }

    public boolean isBNode()
    {
        return tokenType.equals(TokenType.BNODE) ;
    }

    // Validation of URIs?
    
    /** Token to Node, a very direct form that is purely driven off the token.
     *  Turtle and N-triples need to process the token and not call this:
     *  1/ Ues bNode label as given
     *  2/ No prefix or URI resolution.
     */
    public Node asNode()
    {
        switch(tokenType)
        {
            // Assumes that bnode labes have been sorted out already.
            case BNODE : return Node.createAnon(new AnonId(tokenImage)) ;
            case IRI :   return Node.createURI(tokenImage) ; 
            case PREFIXED_NAME :
                return Node.createURI("urn:prefixed-name:"+tokenImage+":"+tokenImage2) ;
            case DECIMAL :  return Node.createLiteral(tokenImage, null, XSDDatatype.XSDdecimal)  ; 
            case DOUBLE :   return Node.createLiteral(tokenImage, null, XSDDatatype.XSDdouble)  ;
            case INTEGER:   return Node.createLiteral(tokenImage, null, XSDDatatype.XSDinteger) ;
            case LITERAL_DT :
            {
                Node n = getSubToken().asNode();
                if ( ! n.isURI() )
                    throw new RiotException("Invalid token: "+this) ;
                RDFDatatype dt = TypeMapper.getInstance().getSafeTypeByName(n.getURI()) ;
                return Node.createLiteral(tokenImage, null, dt)  ;
            }
            case LITERAL_LANG : return Node.createLiteral(tokenImage, tokenImage2, null)  ;
            case STRING:
            case STRING1:
            case STRING2:
            case LONG_STRING1:
            case LONG_STRING2:
                return Node.createLiteral(tokenImage) ;
            
            default: break ;
        }
        return null ;
    }
    
    public boolean hasType(TokenType tokenType)
    {
        return getType() == tokenType ;
    }
    
    @Override
    public int hashCode()
    {
        return Utils.hashCodeObject(tokenType) ^
                Utils.hashCodeObject(tokenImage) ^
                Utils.hashCodeObject(tokenImage2) ^
                Utils.hashCodeObject(cntrlCode) ;
    }
    
    @Override
    public boolean equals(Object other)
    {
        if ( ! ( other instanceof Token ) ) return false ;
        Token t = (Token)other ;
        return  equal(tokenType, t.tokenType) &&
                equal(tokenImage, t.tokenImage) &&
                equal(tokenImage2, t.tokenImage2) &&
                equal(cntrlCode, t.cntrlCode) ;
    }
    
    public static Token tokenForChar(char character)
    {
        switch(character)
        { 
            case CH_DOT:        return new Token(TokenType.DOT) ;
            case CH_SEMICOLON:  return new Token(TokenType.SEMICOLON) ;
            case CH_COMMA:      return new Token(TokenType.COMMA) ;
            case CH_LBRACE:     return new Token(TokenType.LBRACE) ;
            case CH_RBRACE:     return new Token(TokenType.RBRACE) ;
            case CH_LPAREN:     return new Token(TokenType.LPAREN) ;
            case CH_RPAREN:     return new Token(TokenType.RPAREN) ;
            case CH_LBRACKET:   return new Token(TokenType.LBRACKET) ;
            case CH_RBRACKET:   return new Token(TokenType.RBRACKET) ;
            default:
                throw new RuntimeException("Token error: unrecognized charcater: "+character) ;
        }
    }
    
    public static Token tokenForInteger(long value)
    {
        return new Token(TokenType.INTEGER, Long.toString(value)) ;
    }
    
    public static Token tokenForWord(String word)
    {
        return new Token(TokenType.KEYWORD, word) ; 
    }

    public static Token tokenForNode(Node n)
    {
        return tokenForNode(n, null, null) ;
    }

    public static Token tokenForNode(Node n, Prologue prologue)
    {
        return tokenForNode(n, prologue.getBaseURI(), prologue.getPrefixMap()) ;
    }

    public static Token tokenForNode(Node n, String base, PrefixMap mapping)
    {
            if ( n.isURI() )
            {
                String uri = n.getURI();
                if ( mapping != null )
                {
                    Pair<String,String> pname = mapping.abbrev(uri) ;
                    if ( pname != null )
                        return new Token(TokenType.PREFIXED_NAME, pname.getLeft(), pname.getRight()) ;
                }
                if ( base != null )
                {
                    String x = FmtUtils.abbrevByBase(uri, base) ;
                    if ( x != null ) 
                        return new Token(TokenType.IRI, x) ;
                }
                return new Token(IRI, n.getURI()) ;
            }
            if ( n.isBlank() )
                return new Token(BNODE, n.getBlankNodeLabel()) ;
            if ( n.isVariable() )
                return new Token(VAR, n.getName()) ;
            if ( n.isLiteral() )
            {
                String datatype = n.getLiteralDatatypeURI() ;
                String lang = n.getLiteralLanguage() ;
                String s = n.getLiteralLexicalForm() ;
                
                if ( datatype != null )
                {
                    // Special form we know how to handle?
                    // Assume valid text
                    if ( datatype.equals(XSD.integer.getURI()) )
                    {
                        try {
                            String s1 = s ;
                            // BigInteger does not allow leading +
                            // so chop it off before the format test
                            // BigDecimal does allow a leading +
                            if ( s.startsWith("+") )
                                s1 = s.substring(1) ;
                            new java.math.BigInteger(s1) ;
                            return new Token(INTEGER, s) ;
                        } catch (NumberFormatException nfe) {}
                        // No luck.  Continue.
                        // Continuing is always safe.
                    }
                    
                    if ( datatype.equals(XSD.decimal.getURI()) )
                    {
                        if ( s.indexOf('.') > 0 )
                        {
                            try {
                                // BigDecimal does allow a leading +
                                new java.math.BigDecimal(s) ;
                                return new Token(DECIMAL, s) ;
                            } catch (NumberFormatException nfe) {}
                            // No luck.  Continue.
                        }
                    }
                    
                    if ( datatype.equals(XSD.xdouble.getURI()) )
                    {
                        // Assumes SPARQL has decimals and doubles.
                        // Must have 'e' or 'E' to be a double short form.
    
                        if ( s.indexOf('e') >= 0 || s.indexOf('E') >= 0 )
                        {
                            try {
                                Double.parseDouble(s) ;
                                return new Token(DOUBLE, s) ; 
                            } catch (NumberFormatException nfe) {}
                            // No luck.  Continue.
                        }
                    }
    
    //                if ( datatype.equals(XSD.xboolean.getURI()) )
    //                {
    //                    if ( s.equalsIgnoreCase("true") ) return new Token(BOOLEAN, s) ;
    //                    if ( s.equalsIgnoreCase("false") ) return new Token(BOOLEAN, s) ;
    //                }
                    // Not a recognized form.
                    // Has datatype.
                    
                    Node dt = Node.createURI(datatype) ;
                    Token subToken = tokenForNode(dt) ;
                    return new Token(LITERAL_DT, s, subToken) ;
                }
    
                if ( lang != null && lang.length()>0)
                    return new Token(LITERAL_LANG, s, lang) ; 
                
                // Plain.
                return new Token(STRING, s) ; 
            }
            
            throw new IllegalArgumentException() ;
            
        }
}

/*
 * (c) Copyright 2009 Hewlett-Packard Development Company, LP
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */