public abstract class AbstractCommonCrawlFormat extends Object implements CommonCrawlFormat
CommonCrawlFormat
interface.Modifier and Type | Field and Description |
---|---|
protected Configuration |
conf |
protected byte[] |
content |
protected boolean |
jsonArray |
protected String |
keyPrefix |
protected Metadata |
metadata |
protected boolean |
reverseKey |
protected String |
reverseKeyValue |
protected boolean |
simpleDateFormat |
protected String |
url |
Constructor and Description |
---|
AbstractCommonCrawlFormat(String url,
byte[] content,
Metadata metadata,
Configuration nutchConf,
CommonCrawlConfig config) |
protected String url
protected byte[] content
protected Metadata metadata
protected Configuration conf
protected String keyPrefix
protected boolean simpleDateFormat
protected boolean jsonArray
protected boolean reverseKey
protected String reverseKeyValue
public AbstractCommonCrawlFormat(String url, byte[] content, Metadata metadata, Configuration nutchConf, CommonCrawlConfig config) throws IOException
IOException
public String getJsonData() throws IOException
getJsonData
in interface CommonCrawlFormat
IOException
protected abstract void writeKeyValue(String key, String value) throws IOException
IOException
protected abstract void writeKeyNull(String key) throws IOException
IOException
protected abstract void startArray(String key, boolean nested, boolean newline) throws IOException
IOException
protected abstract void closeArray(String key, boolean nested, boolean newline) throws IOException
IOException
protected abstract void writeArrayValue(String value) throws IOException
IOException
protected abstract void startObject(String key) throws IOException
IOException
protected abstract void closeObject(String key) throws IOException
IOException
protected abstract String generateJson() throws IOException
IOException
protected String getUrl()
protected String getTimestamp()
protected String getMethod()
protected String getRequestHostName()
protected String getRequestHostAddress()
protected String getRequestSoftware()
protected String getRequestRobots()
protected String getRequestContactName()
protected String getRequestContactEmail()
protected String getRequestAccept()
protected String getRequestAcceptEncoding()
protected String getRequestAcceptLanguage()
protected String getRequestUserAgent()
protected String getResponseStatus()
protected String getResponseHostName()
protected String getResponseAddress()
protected String getResponseContentEncoding()
protected String getResponseContentType()
protected String getResponseDate()
protected String getResponseServer()
protected String getResponseContent()
protected String getKey()
protected String getImported()
Copyright © 2015 The Apache Software Foundation