Serialized Form
-
Package org.apache.tika.batch
-
Class org.apache.tika.batch.BatchNoRestartError extends Error implements Serializable
-
-
Package org.apache.tika.config
-
Package org.apache.tika.detect
-
Class org.apache.tika.detect.CompositeDetector extends Object implements Serializable
- serialVersionUID:
- 5980683158436430252L
-
Serialized Fields
-
detectors
List<Detector> detectors
-
registry
MediaTypeRegistry registry
-
-
Class org.apache.tika.detect.CompositeEncodingDetector extends Object implements Serializable
- serialVersionUID:
- 5980683158436430252L
-
Serialized Fields
-
detectors
List<EncodingDetector> detectors
-
-
Class org.apache.tika.detect.DefaultDetector extends CompositeDetector implements Serializable
- serialVersionUID:
- -8170114575326908027L
-
Class org.apache.tika.detect.DefaultEncodingDetector extends CompositeEncodingDetector implements Serializable
-
Class org.apache.tika.detect.DefaultProbDetector extends CompositeDetector implements Serializable
- serialVersionUID:
- -8836240060532323352L
-
Class org.apache.tika.detect.EmptyDetector extends Object implements Serializable
-
Class org.apache.tika.detect.MagicDetector extends Object implements Serializable
-
Serialized Fields
-
isRegex
boolean isRegex
True if pattern is a regular expression, false otherwise. -
isStringIgnoreCase
boolean isStringIgnoreCase
True if we're doing a case-insensitive string match, false otherwise. -
length
int length
Length of the comparison window. -
mask
byte[] mask
Bit mask that is applied to the source bytes before pattern matching. -
offsetRangeBegin
int offsetRangeBegin
First offset (inclusive) of the comparison window within the document input stream. Greater than or equal to zero. -
offsetRangeEnd
int offsetRangeEnd
Last offset (inclusive) of the comparison window within the document input stream. Greater than or equal to thefirst offset
.Note that this is not the offset of the last byte read from the document stream. Instead, the last window of bytes to be compared starts at this offset.
-
pattern
byte[] pattern
The magic match pattern. If this byte pattern is equal to the possibly bit-masked bytes from the input stream, then the type detection succeeds and the configuredMagicDetector.type
is returned. -
patternLength
int patternLength
Length of the pattern, which in the case of regular expressions will not be the same as the comparison window length. -
type
MediaType type
The matching media type. Returned by theMagicDetector.detect(InputStream, Metadata)
method if a match is found.
-
-
-
Class org.apache.tika.detect.NameDetector extends Object implements Serializable
-
Class org.apache.tika.detect.NNExampleModelDetector extends TrainedModelDetector implements Serializable
- serialVersionUID:
- 1L
-
Class org.apache.tika.detect.NonDetectingEncodingDetector extends Object implements Serializable
-
Serialized Fields
-
charset
Charset charset
-
-
-
Class org.apache.tika.detect.OverrideDetector extends Object implements Serializable
-
Class org.apache.tika.detect.TextDetector extends Object implements Serializable
- serialVersionUID:
- 4774601079503507765L
-
Serialized Fields
-
bytesToTest
int bytesToTest
-
-
Class org.apache.tika.detect.TrainedModelDetector extends Object implements Serializable
- serialVersionUID:
- 1L
-
Serialized Fields
-
MODEL_MAP
Map<MediaType,TrainedModel> MODEL_MAP
-
-
Class org.apache.tika.detect.TypeDetector extends Object implements Serializable
-
Class org.apache.tika.detect.ZeroSizeFileDetector extends Object implements Serializable
-
-
Package org.apache.tika.embedder
-
Class org.apache.tika.embedder.ExternalEmbedder extends Object implements Serializable
- serialVersionUID:
- -2828829275642475697L
-
Serialized Fields
-
command
String[] command
The external command to invoke.- See Also:
Runtime.exec(String[])
-
commandAppendOperator
String commandAppendOperator
-
commandAssignmentDelimeter
String commandAssignmentDelimeter
-
commandAssignmentOperator
String commandAssignmentOperator
-
metadataCommandArguments
Map<Property,String[]> metadataCommandArguments
Mapping of Tika metadata to command line parameters. -
quoteAssignmentValues
boolean quoteAssignmentValues
-
supportedEmbedTypes
Set<MediaType> supportedEmbedTypes
Media types supported by the external program. -
tmp
TemporaryResources tmp
-
-
-
Package org.apache.tika.eval.io
-
Class org.apache.tika.eval.io.ExtractReaderException extends IOException implements Serializable
-
Serialized Fields
-
type
ExtractReaderException.TYPE type
-
-
-
-
Package org.apache.tika.example
-
Class org.apache.tika.example.DirListParser extends Object implements Serializable
- serialVersionUID:
- 2717930544410610735L
-
Class org.apache.tika.example.EncryptedPrescriptionDetector extends Object implements Serializable
- serialVersionUID:
- -1709652690773421147L
-
Class org.apache.tika.example.EncryptedPrescriptionParser extends AbstractParser implements Serializable
- serialVersionUID:
- -7816987249611278541L
-
Class org.apache.tika.example.LanguageDetectingParser extends DelegatingParser implements Serializable
- serialVersionUID:
- 4291320409396502774L
-
Class org.apache.tika.example.PrescriptionParser extends XMLParser implements Serializable
- serialVersionUID:
- 7690682277511967388L
-
-
Package org.apache.tika.exception
-
Class org.apache.tika.exception.AccessPermissionException extends TikaException implements Serializable
-
Class org.apache.tika.exception.CorruptedFileException extends TikaException implements Serializable
-
Class org.apache.tika.exception.EncryptedDocumentException extends TikaException implements Serializable
-
Class org.apache.tika.exception.TikaConfigException extends TikaException implements Serializable
-
Class org.apache.tika.exception.TikaException extends Exception implements Serializable
-
Class org.apache.tika.exception.TikaMemoryLimitException extends TikaException implements Serializable
-
Class org.apache.tika.exception.UnsupportedFormatException extends TikaException implements Serializable
-
Class org.apache.tika.exception.ZeroByteFileException extends TikaException implements Serializable
-
-
Package org.apache.tika.extractor
-
Class org.apache.tika.extractor.EmbeddedDocumentUtil extends Object implements Serializable
-
Serialized Fields
-
context
ParseContext context
-
detector
Detector detector
-
embeddedDocumentExtractor
EmbeddedDocumentExtractor embeddedDocumentExtractor
-
mimeTypes
MimeTypes mimeTypes
-
tikaConfig
TikaConfig tikaConfig
-
-
-
Class org.apache.tika.extractor.ParserContainerExtractor extends Object implements Serializable
- serialVersionUID:
- 2261131045580861514L
-
-
Package org.apache.tika.fork
-
Class org.apache.tika.fork.ForkParser extends AbstractParser implements Serializable
- serialVersionUID:
- -4962742892274663950L
-
Serialized Fields
-
currentlyInUse
int currentlyInUse
-
java
List<String> java
Java command line -
loader
ClassLoader loader
-
maxFilesProcessedPerClient
int maxFilesProcessedPerClient
-
parser
Parser parser
-
parserFactoryFactory
ParserFactoryFactory parserFactoryFactory
-
pool
Queue<org.apache.tika.fork.ForkClient> pool
-
poolSize
int poolSize
Process pool size -
serverParseTimeoutMillis
long serverParseTimeoutMillis
-
serverPulseMillis
long serverPulseMillis
-
serverWaitTimeoutMillis
long serverWaitTimeoutMillis
-
tikaBin
Path tikaBin
-
-
Class org.apache.tika.fork.ParserFactoryFactory extends Object implements Serializable
- serialVersionUID:
- 4710974869988895410L
-
-
Package org.apache.tika.gui
-
Class org.apache.tika.gui.TikaGUI extends JFrame implements Serializable
- serialVersionUID:
- 5883906936187059495L
-
Serialized Fields
-
cards
JPanel cards
Container for the editor cards. -
chooser
JFileChooser chooser
File chooser. -
context
ParseContext context
Parsing context. -
html
JEditorPane html
Formatted XHTML output. -
imageParser
org.apache.tika.gui.TikaGUI.ImageSavingParser imageParser
Captures requested embedded images -
json
JEditorPane json
Raw JSON source. -
layout
CardLayout layout
The card layout for switching between different views. -
metadata
JEditorPane metadata
Document metadata. -
parser
Parser parser
Configured parser instance. -
text
JEditorPane text
Plain text output. -
textMain
JEditorPane textMain
Main content output. -
xml
JEditorPane xml
Raw XHTML source.
-
-
-
Package org.apache.tika.io
-
Class org.apache.tika.io.EndianUtils.BufferUnderrunException extends TikaException implements Serializable
- serialVersionUID:
- 8358288231138076276L
-
Class org.apache.tika.io.IOExceptionWithCause extends IOException implements Serializable
- serialVersionUID:
- 1L
-
Class org.apache.tika.io.TaggedIOException extends IOExceptionWithCause implements Serializable
-
Serialized Fields
-
tag
Object tag
The object reference used to tag the exception.
-
-
-
-
Package org.apache.tika.metadata
-
Class org.apache.tika.metadata.Metadata extends Object implements Serializable
- serialVersionUID:
- 5623926545693153182L
-
Class org.apache.tika.metadata.PropertyTypeException extends IllegalArgumentException implements Serializable
-
-
Package org.apache.tika.mime
-
Class org.apache.tika.mime.MediaType extends Object implements Serializable
- serialVersionUID:
- -3831000556189036392L
-
Serialized Fields
-
parameters
Map<String,String> parameters
Immutable sorted map of media type parameters. -
semicolon
int semicolon
Location of the first ";" character separating the type part ofMediaType.string
from possible parameters. Length ofMediaType.string
in case there are no parameters. -
slash
int slash
Location of the "/" character separating the type and the subtype tokens inMediaType.string
. -
string
String string
Canonical string representation of this media type.
-
-
Class org.apache.tika.mime.MediaTypeRegistry extends Object implements Serializable
- serialVersionUID:
- 4710974869988895410L
-
Serialized Fields
-
inheritance
Map<MediaType,MediaType> inheritance
Known type inheritance relationships. The mapping is from a media type to the closest supertype. -
registry
Map<MediaType,MediaType> registry
Registry of known media types, including type aliases. A canonical media type is handled as an identity mapping, while an alias is stored as a mapping from the alias to the corresponding canonical type.
-
-
Class org.apache.tika.mime.MimeType extends Object implements Serializable
- serialVersionUID:
- 4357830439860729201L
-
Serialized Fields
-
acronym
String acronym
The MimeType acronym -
description
String description
Description of this media type. -
extensions
List<String> extensions
All known file extensions of this type, in order of preference (best first). -
isInterpreted
boolean isInterpreted
Whether this mime-type is used for server-side scripts, and thus cannot reliably be used for filename-based type detection -
links
List<URI> links
Documentation Links -
magics
List<org.apache.tika.mime.Magic> magics
The magics associated to this Mime-Type -
minLength
int minLength
The minimum length of data to provides for magic analyzis -
rootXML
List<org.apache.tika.mime.MimeType.RootXML> rootXML
The root-XML associated to this Mime-Type -
type
MediaType type
The normalized media type name. -
uti
String uti
The http://en.wikipedia.org/wiki/Uniform_Type_Identifier
-
-
Class org.apache.tika.mime.MimeTypeException extends TikaException implements Serializable
-
Class org.apache.tika.mime.MimeTypes extends Object implements Serializable
- serialVersionUID:
- -1350863170146349036L
-
Serialized Fields
-
htmlMimeType
MimeType htmlMimeType
html type, text/html -
magics
List<org.apache.tika.mime.Magic> magics
Sorted list of all registered magics -
patterns
org.apache.tika.mime.Patterns patterns
The patterns matcher -
registry
MediaTypeRegistry registry
Registered media types and their aliases. -
rootMimeType
MimeType rootMimeType
Root type, application/octet-stream. -
rootMimeTypeL
List<MimeType> rootMimeTypeL
-
textMimeType
MimeType textMimeType
Text type, text/plain. -
types
Map<MediaType,MimeType> types
All the registered MimeTypes indexed on their canonical names -
xmlMimeType
MimeType xmlMimeType
xml type, application/xml -
xmls
List<MimeType> xmls
Sorted list of all registered rootXML
-
-
Class org.apache.tika.mime.ProbabilisticMimeDetectionSelector extends Object implements Serializable
- serialVersionUID:
- 224589862960269260L
-
Serialized Fields
-
changeRate
float changeRate
-
extension_neg
float extension_neg
-
extension_trust
float extension_trust
-
magic_neg
float magic_neg
-
magic_trust
float magic_trust
-
meta_neg
float meta_neg
-
meta_trust
float meta_trust
-
mimeTypes
MimeTypes mimeTypes
-
priorExtensionFileType
float priorExtensionFileType
-
priorMagicFileType
float priorMagicFileType
-
priorMetaFileType
float priorMetaFileType
-
rootMediaType
MediaType rootMediaType
-
threshold
float threshold
-
-
-
Package org.apache.tika.parser
-
Class org.apache.tika.parser.AbstractEncodingDetectorParser extends AbstractParser implements Serializable
-
Serialized Fields
-
encodingDetector
EncodingDetector encodingDetector
-
-
-
Class org.apache.tika.parser.AbstractParser extends Object implements Serializable
- serialVersionUID:
- 7186985395903074255L
-
Class org.apache.tika.parser.AutoDetectParser extends CompositeParser implements Serializable
- serialVersionUID:
- 6110455808615143122L
-
Serialized Fields
-
detector
Detector detector
The type detector used by this parser to auto-detect the type of a document.
-
-
Class org.apache.tika.parser.CompositeParser extends AbstractParser implements Serializable
- serialVersionUID:
- 2192845797749627824L
-
Serialized Fields
-
fallback
Parser fallback
The fallback parser, used when no better parser is available. -
parsers
List<Parser> parsers
List of component parsers. -
registry
MediaTypeRegistry registry
Media type registry.
-
-
Class org.apache.tika.parser.CryptoParser extends DelegatingParser implements Serializable
- serialVersionUID:
- -3507995752666557731L
-
Class org.apache.tika.parser.DefaultParser extends CompositeParser implements Serializable
- serialVersionUID:
- 3612324825403757520L
-
Class org.apache.tika.parser.DelegatingParser extends AbstractParser implements Serializable
-
Class org.apache.tika.parser.DigestingParser extends ParserDecorator implements Serializable
-
Serialized Fields
-
digester
DigestingParser.Digester digester
-
-
-
Class org.apache.tika.parser.EmptyParser extends AbstractParser implements Serializable
- serialVersionUID:
- -4218649699095732123L
-
Class org.apache.tika.parser.ErrorParser extends AbstractParser implements Serializable
- serialVersionUID:
- 7727423956957641824L
-
Class org.apache.tika.parser.NetworkParser extends AbstractParser implements Serializable
-
Class org.apache.tika.parser.ParseContext extends Object implements Serializable
- serialVersionUID:
- -5921436862145826534L
-
Class org.apache.tika.parser.ParserDecorator extends AbstractParser implements Serializable
- serialVersionUID:
- -3861669115439125268L
-
Serialized Fields
-
parser
Parser parser
The decorated parser instance.
-
-
Class org.apache.tika.parser.ParserPostProcessor extends ParserDecorator implements Serializable
-
Class org.apache.tika.parser.RecursiveParserWrapper extends ParserDecorator implements Serializable
- serialVersionUID:
- 9086536568120690938L
-
Serialized Fields
-
catchEmbeddedExceptions
boolean catchEmbeddedExceptions
-
contentHandlerFactory
ContentHandlerFactory contentHandlerFactory
Deprecated.this should be passed in via theRecursiveParserWrapperHandler
-
lastParseState
org.apache.tika.parser.RecursiveParserWrapper.ParserState lastParseState
Deprecated.this is here only for legacy behavior; it will be removed in 2.0 and/or 1.20 -
maxEmbeddedResources
int maxEmbeddedResources
Deprecated.this is here only for legacy behavior; it will be removed in 2.0 and/or 1.20set this on the RecursiveParserWrapperHandler instead
-
-
-
Package org.apache.tika.parser.apple
-
Class org.apache.tika.parser.apple.AppleSingleFileParser extends AbstractParser implements Serializable
-
-
Package org.apache.tika.parser.asm
-
Class org.apache.tika.parser.asm.ClassParser extends AbstractParser implements Serializable
- serialVersionUID:
- -3531388963354454357L
-
-
Package org.apache.tika.parser.audio
-
Class org.apache.tika.parser.audio.AudioParser extends AbstractParser implements Serializable
- serialVersionUID:
- -6015684081240882695L
-
Class org.apache.tika.parser.audio.MidiParser extends AbstractParser implements Serializable
- serialVersionUID:
- 6343278584336189432L
-
-
Package org.apache.tika.parser.chm
-
Class org.apache.tika.parser.chm.ChmParser extends AbstractParser implements Serializable
- serialVersionUID:
- 5938777307516469802L
-
-
Package org.apache.tika.parser.chm.accessor
-
Class org.apache.tika.parser.chm.accessor.ChmItsfHeader extends Object implements Serializable
- serialVersionUID:
- 2215291838533213826L
-
Serialized Fields
-
currentPlace
int currentPlace
-
data_offset
long data_offset
-
dataRemained
int dataRemained
-
dir_len
long dir_len
-
dir_offset
long dir_offset
-
dir_uuid
byte[] dir_uuid
-
header_len
int header_len
-
lang_id
long lang_id
-
last_modified
long last_modified
-
signature
byte[] signature
-
stream_uuid
byte[] stream_uuid
-
unknown_000c
int unknown_000c
-
unknown_len
long unknown_len
-
unknown_offset
long unknown_offset
-
version
int version
-
-
Class org.apache.tika.parser.chm.accessor.ChmItspHeader extends Object implements Serializable
- serialVersionUID:
- 1962394421998181341L
-
Serialized Fields
-
block_len
long block_len
-
blockidx_intvl
int blockidx_intvl
-
currentPlace
int currentPlace
-
dataRemained
int dataRemained
-
header_len
int header_len
-
index_depth
int index_depth
-
index_head
int index_head
-
index_root
int index_root
-
lang_id
long lang_id
-
num_blocks
long num_blocks
-
signature
byte[] signature
-
system_uuid
byte[] system_uuid
-
unknown_000c
int unknown_000c
-
unknown_0024
int unknown_0024
-
unknown_002c
int unknown_002c
-
unknown_0044
byte[] unknown_0044
-
version
int version
-
-
Class org.apache.tika.parser.chm.accessor.ChmLzxcControlData extends Object implements Serializable
- serialVersionUID:
- -7897854774939631565L
-
Serialized Fields
-
currentPlace
int currentPlace
-
dataRemained
int dataRemained
-
resetInterval
long resetInterval
-
signature
byte[] signature
-
size
long size
-
unknown_18
long unknown_18
-
version
long version
-
windowSize
long windowSize
-
windowsPerReset
long windowsPerReset
-
-
Class org.apache.tika.parser.chm.accessor.ChmLzxcResetTable extends Object implements Serializable
- serialVersionUID:
- -8209574429411707460L
-
Serialized Fields
-
block_address
long[] block_address
-
block_count
long block_count
-
block_len
long block_len
-
compressed_len
long compressed_len
-
currentPlace
int currentPlace
-
dataRemained
int dataRemained
-
table_offset
long table_offset
-
uncompressed_len
long uncompressed_len
-
unknown
long unknown
-
version
long version
-
-
Class org.apache.tika.parser.chm.accessor.ChmPmgiHeader extends Object implements Serializable
- serialVersionUID:
- -2092282339894303701L
-
Serialized Fields
-
currentPlace
int currentPlace
-
dataRemained
int dataRemained
-
free_space
long free_space
-
signature
byte[] signature
-
-
Class org.apache.tika.parser.chm.accessor.ChmPmglHeader extends Object implements Serializable
- serialVersionUID:
- -6139486487475923593L
-
Serialized Fields
-
block_next
int block_next
-
block_prev
int block_prev
-
currentPlace
int currentPlace
-
dataRemained
int dataRemained
-
free_space
long free_space
-
signature
byte[] signature
-
unknown_0008
long unknown_0008
-
-
-
Package org.apache.tika.parser.chm.exception
-
Class org.apache.tika.parser.chm.exception.ChmParsingException extends TikaException implements Serializable
- serialVersionUID:
- 6497936044733665210L
-
-
Package org.apache.tika.parser.code
-
Class org.apache.tika.parser.code.SourceCodeParser extends AbstractEncodingDetectorParser implements Serializable
- serialVersionUID:
- -4543476498190054160L
-
-
Package org.apache.tika.parser.crypto
-
Class org.apache.tika.parser.crypto.Pkcs7Parser extends AbstractParser implements Serializable
- serialVersionUID:
- -7310531559075115044L
-
Class org.apache.tika.parser.crypto.TSDParser extends AbstractParser implements Serializable
- serialVersionUID:
- 3268158344501763323L
-
-
Package org.apache.tika.parser.csv
-
Class org.apache.tika.parser.csv.TextAndCSVParser extends AbstractEncodingDetectorParser implements Serializable
-
Serialized Fields
-
delimiters
char[] delimiters
-
markLimit
int markLimit
This is the mark limit in characters (not bytes) to read from the stream when classifying the stream as csv, tsv or txt. -
minConfidence
double minConfidence
minimum confidence score that there's enough evidence to determine csv/tsv vs. txt
-
-
-
-
Package org.apache.tika.parser.ctakes
-
Class org.apache.tika.parser.ctakes.CTAKESConfig extends Object implements Serializable
- serialVersionUID:
- -1599741171775528923L
-
Serialized Fields
-
aeDescriptorPath
String aeDescriptorPath
-
annotationProps
CTAKESAnnotationProperty[] annotationProps
-
metadata
String[] metadata
-
prettyPrint
boolean prettyPrint
-
separatorChar
char separatorChar
-
serialize
boolean serialize
-
serializerType
CTAKESSerializer serializerType
-
stream
OutputStream stream
-
text
boolean text
-
UMLSPass
String UMLSPass
-
UMLSUser
String UMLSUser
-
-
Class org.apache.tika.parser.ctakes.CTAKESParser extends ParserDecorator implements Serializable
- serialVersionUID:
- -2313482748027097961L
-
-
Package org.apache.tika.parser.dbf
-
Class org.apache.tika.parser.dbf.DBFParser extends AbstractParser implements Serializable
-
-
Package org.apache.tika.parser.dif
-
Class org.apache.tika.parser.dif.DIFParser extends AbstractParser implements Serializable
- serialVersionUID:
- 971505521275777826L
-
-
Package org.apache.tika.parser.dwg
-
Class org.apache.tika.parser.dwg.DWGParser extends AbstractParser implements Serializable
- serialVersionUID:
- -7744232583079169119L
-
-
Package org.apache.tika.parser.envi
-
Class org.apache.tika.parser.envi.EnviHeaderParser extends AbstractEncodingDetectorParser implements Serializable
- serialVersionUID:
- -1479368523072408091L
-
-
Package org.apache.tika.parser.epub
-
Class org.apache.tika.parser.epub.EpubContentParser extends AbstractParser implements Serializable
-
Class org.apache.tika.parser.epub.EpubParser extends AbstractParser implements Serializable
- serialVersionUID:
- 215176772484050550L
-
-
Package org.apache.tika.parser.executable
-
Class org.apache.tika.parser.executable.ExecutableParser extends AbstractParser implements Serializable
- serialVersionUID:
- 32128791892482L
-
-
Package org.apache.tika.parser.external
-
Class org.apache.tika.parser.external.CompositeExternalParser extends CompositeParser implements Serializable
- serialVersionUID:
- 6962436916649024024L
-
Class org.apache.tika.parser.external.ExternalParser extends AbstractParser implements Serializable
- serialVersionUID:
- -1079128990650687037L
-
Serialized Fields
-
command
String[] command
The external command to invoke.- See Also:
Runtime.exec(String[])
-
ignoredLineConsumer
ExternalParser.LineConsumer ignoredLineConsumer
A consumer for ignored Lines -
metadataPatterns
Map<Pattern,String> metadataPatterns
Regular Expressions to run over STDOUT to extract Metadata. -
supportedTypes
Set<MediaType> supportedTypes
Media types supported by the external program.
-
-
-
Package org.apache.tika.parser.feed
-
Class org.apache.tika.parser.feed.FeedParser extends AbstractParser implements Serializable
- serialVersionUID:
- -3785361933034525186L
-
-
Package org.apache.tika.parser.font
-
Class org.apache.tika.parser.font.AdobeFontMetricParser extends AbstractParser implements Serializable
- serialVersionUID:
- -4820306522217196835L
-
Class org.apache.tika.parser.font.TrueTypeParser extends AbstractParser implements Serializable
- serialVersionUID:
- 44788554612243032L
-
-
Package org.apache.tika.parser.gdal
-
Class org.apache.tika.parser.gdal.GDALParser extends AbstractParser implements Serializable
- serialVersionUID:
- -3869130527323941401L
-
Serialized Fields
-
command
String command
-
-
-
Package org.apache.tika.parser.geo.topic
-
Class org.apache.tika.parser.geo.topic.GeoParser extends AbstractParser implements Serializable
- serialVersionUID:
- -2241391757440215491L
-
Serialized Fields
-
available
boolean available
-
config
GeoParserConfig config
-
gazetteerClient
GeoGazetteerClient gazetteerClient
-
initialized
boolean initialized
-
modelUrl
URL modelUrl
-
nameFinder
opennlp.tools.namefind.NameFinderME nameFinder
-
-
Class org.apache.tika.parser.geo.topic.GeoParserConfig extends Object implements Serializable
- serialVersionUID:
- -3167692634278575818L
-
-
Package org.apache.tika.parser.geo.topic.gazetteer
-
Class org.apache.tika.parser.geo.topic.gazetteer.Location extends Object implements Serializable
- serialVersionUID:
- -59485448766406004L
-
-
Package org.apache.tika.parser.geoinfo
-
Class org.apache.tika.parser.geoinfo.GeographicInformationParser extends AbstractParser implements Serializable
-
-
Package org.apache.tika.parser.grib
-
Class org.apache.tika.parser.grib.GribParser extends AbstractParser implements Serializable
- serialVersionUID:
- 7855458954474247655L
-
-
Package org.apache.tika.parser.hdf
-
Class org.apache.tika.parser.hdf.HDFParser extends AbstractParser implements Serializable
- serialVersionUID:
- 1091208208003437549L
-
-
Package org.apache.tika.parser.html
-
Class org.apache.tika.parser.html.HtmlEncodingDetector extends Object implements Serializable
-
Serialized Fields
-
markLimit
int markLimit
-
-
-
Class org.apache.tika.parser.html.HtmlParser extends AbstractEncodingDetectorParser implements Serializable
- serialVersionUID:
- 7895315240498733128L
-
Serialized Fields
-
extractScripts
boolean extractScripts
-
-
-
Package org.apache.tika.parser.html.charsetdetector
-
Class org.apache.tika.parser.html.charsetdetector.StandardHtmlEncodingDetector extends Object implements Serializable
-
Serialized Fields
-
markLimit
int markLimit
-
-
-
-
Package org.apache.tika.parser.hwp
-
Class org.apache.tika.parser.hwp.HwpTextExtractorV5 extends Object implements Serializable
- serialVersionUID:
- 1L
-
Class org.apache.tika.parser.hwp.HwpV5Parser extends AbstractParser implements Serializable
- serialVersionUID:
- 1L
-
Serialized Fields
-
extractor
HwpTextExtractorV5 extractor
-
-
-
Package org.apache.tika.parser.image
-
Class org.apache.tika.parser.image.BPGParser extends AbstractParser implements Serializable
- serialVersionUID:
- -161736541253892772L
-
Class org.apache.tika.parser.image.ICNSParser extends AbstractParser implements Serializable
- serialVersionUID:
- 922010233654248327L
-
Class org.apache.tika.parser.image.ImageParser extends AbstractParser implements Serializable
- serialVersionUID:
- 7852529269245520335L
-
Class org.apache.tika.parser.image.PSDParser extends AbstractParser implements Serializable
- serialVersionUID:
- 883387734607994914L
-
Class org.apache.tika.parser.image.TiffParser extends AbstractParser implements Serializable
- serialVersionUID:
- -3941143576535464926L
-
Class org.apache.tika.parser.image.WebPParser extends AbstractParser implements Serializable
- serialVersionUID:
- -3941143576535464926L
-
-
Package org.apache.tika.parser.iptc
-
Class org.apache.tika.parser.iptc.IptcAnpaParser extends Object implements Serializable
- serialVersionUID:
- -6062820170212879115L
-
Serialized Fields
-
FMT_ANPA_1312
int FMT_ANPA_1312
-
FMT_ANPA_UPI
int FMT_ANPA_UPI
-
FMT_ANPA_UPI_DL
int FMT_ANPA_UPI_DL
-
FMT_IPTC_7901
int FMT_IPTC_7901
-
FMT_IPTC_AP
int FMT_IPTC_AP
-
FMT_IPTC_BLM
int FMT_IPTC_BLM
-
FMT_IPTC_CHAR
int FMT_IPTC_CHAR
-
FMT_IPTC_NYT
int FMT_IPTC_NYT
-
FMT_IPTC_PHOTO
int FMT_IPTC_PHOTO
-
FMT_IPTC_RTR
int FMT_IPTC_RTR
-
FMT_NITF
int FMT_NITF
-
FMT_NITF_RB
int FMT_NITF_RB
-
FMT_NITF_TT
int FMT_NITF_TT
-
FORMAT
int FORMAT
-
-
-
Package org.apache.tika.parser.isatab
-
Package org.apache.tika.parser.iwork
-
Class org.apache.tika.parser.iwork.IWorkPackageParser extends AbstractParser implements Serializable
- serialVersionUID:
- -2160322853809682372L
-
-
Package org.apache.tika.parser.iwork.iwana
-
Class org.apache.tika.parser.iwork.iwana.IWork13PackageParser extends AbstractParser implements Serializable
-
-
Package org.apache.tika.parser.jdbc
-
Class org.apache.tika.parser.jdbc.SQLite3Parser extends AbstractParser implements Serializable
- serialVersionUID:
- -752276948656079347L
-
-
Package org.apache.tika.parser.journal
-
Class org.apache.tika.parser.journal.JournalParser extends AbstractParser implements Serializable
- serialVersionUID:
- 4664255544154296438L
-
-
Package org.apache.tika.parser.jpeg
-
Class org.apache.tika.parser.jpeg.JpegParser extends AbstractParser implements Serializable
- serialVersionUID:
- -1355028253756234603L
-
-
Package org.apache.tika.parser.mail
-
Class org.apache.tika.parser.mail.RFC822Parser extends AbstractParser implements Serializable
- serialVersionUID:
- -5504243905998074168L
-
Serialized Fields
-
detector
Detector detector
-
extractAllAlternatives
boolean extractAllAlternatives
-
-
-
Package org.apache.tika.parser.mat
-
Class org.apache.tika.parser.mat.MatParser extends AbstractParser implements Serializable
-
-
Package org.apache.tika.parser.mbox
-
Class org.apache.tika.parser.mbox.MboxParser extends AbstractParser implements Serializable
- serialVersionUID:
- -1762689436731160661L
-
Class org.apache.tika.parser.mbox.OutlookPSTParser extends AbstractParser implements Serializable
- serialVersionUID:
- 620998217748364063L
-
-
Package org.apache.tika.parser.microsoft
-
Class org.apache.tika.parser.microsoft.AbstractOfficeParser extends AbstractParser implements Serializable
-
Serialized Fields
-
defaultOfficeParserConfig
OfficeParserConfig defaultOfficeParserConfig
-
-
-
Class org.apache.tika.parser.microsoft.EMFParser extends AbstractParser implements Serializable
-
Class org.apache.tika.parser.microsoft.JackcessParser extends AbstractParser implements Serializable
- serialVersionUID:
- -752276948656079347L
-
Serialized Fields
-
locale
Locale locale
-
-
Class org.apache.tika.parser.microsoft.MSOwnerFileParser extends AbstractParser implements Serializable
- serialVersionUID:
- -752276948656079347L
-
Class org.apache.tika.parser.microsoft.OfficeParser extends AbstractOfficeParser implements Serializable
- serialVersionUID:
- 7393462244028653479L
-
Class org.apache.tika.parser.microsoft.OfficeParserConfig extends Object implements Serializable
-
Serialized Fields
-
concatenatePhoneticRuns
boolean concatenatePhoneticRuns
-
extractAllAlternativesFromMSG
boolean extractAllAlternativesFromMSG
-
extractMacros
boolean extractMacros
-
includeDeletedContent
boolean includeDeletedContent
-
includeHeadersAndFooters
boolean includeHeadersAndFooters
-
includeMissingRows
boolean includeMissingRows
-
includeMoveFromContent
boolean includeMoveFromContent
-
includeShapeBasedContent
boolean includeShapeBasedContent
-
includeSlideMasterContent
boolean includeSlideMasterContent
-
includeSlideNotes
boolean includeSlideNotes
-
useSAXDocxExtractor
boolean useSAXDocxExtractor
-
useSAXPptxExtractor
boolean useSAXPptxExtractor
-
-
-
Class org.apache.tika.parser.microsoft.OldExcelParser extends AbstractParser implements Serializable
- serialVersionUID:
- 4611820730372823452L
-
Class org.apache.tika.parser.microsoft.POIFSContainerDetector extends Object implements Serializable
- serialVersionUID:
- -3028021741663605293L
-
Serialized Fields
-
markLimit
int markLimit
-
-
Class org.apache.tika.parser.microsoft.TikaExcelGeneralFormat extends Format implements Serializable
- serialVersionUID:
- 1L
-
Serialized Fields
-
decimalFormat
DecimalFormat decimalFormat
-
decimalSymbols
DecimalFormatSymbols decimalSymbols
-
integerFormat
DecimalFormat integerFormat
-
scientificFormat
DecimalFormat scientificFormat
-
-
Class org.apache.tika.parser.microsoft.TNEFParser extends AbstractParser implements Serializable
- serialVersionUID:
- 4611820730372823452L
-
Class org.apache.tika.parser.microsoft.WMFParser extends AbstractParser implements Serializable
-
-
Package org.apache.tika.parser.microsoft.ooxml
-
Class org.apache.tika.parser.microsoft.ooxml.OOXMLParser extends AbstractOfficeParser implements Serializable
- serialVersionUID:
- 6535995710857776481L
-
-
Package org.apache.tika.parser.microsoft.ooxml.xwpf.ml2006
-
Class org.apache.tika.parser.microsoft.ooxml.xwpf.ml2006.Word2006MLParser extends AbstractOfficeParser implements Serializable
-
-
Package org.apache.tika.parser.microsoft.xml
-
Class org.apache.tika.parser.microsoft.xml.AbstractXML2003Parser extends AbstractParser implements Serializable
-
Class org.apache.tika.parser.microsoft.xml.SpreadsheetMLParser extends AbstractXML2003Parser implements Serializable
-
Class org.apache.tika.parser.microsoft.xml.WordMLParser extends AbstractXML2003Parser implements Serializable
-
-
Package org.apache.tika.parser.mp3
-
Class org.apache.tika.parser.mp3.Mp3Parser extends AbstractParser implements Serializable
- serialVersionUID:
- 8537074922934844370L
-
-
Package org.apache.tika.parser.mp4
-
Class org.apache.tika.parser.mp4.MP4Parser extends AbstractParser implements Serializable
- serialVersionUID:
- 84011216792285L
-
Serialized Fields
-
iso6709Extractor
org.apache.tika.parser.mp4.ISO6709Extractor iso6709Extractor
-
-
-
Package org.apache.tika.parser.ner
-
Class org.apache.tika.parser.ner.NamedEntityParser extends AbstractParser implements Serializable
-
Serialized Fields
-
available
boolean available
-
initialized
boolean initialized
-
nerChain
List<NERecogniser> nerChain
-
secondaryParser
Tika secondaryParser
-
-
-
-
Package org.apache.tika.parser.netcdf
-
Class org.apache.tika.parser.netcdf.NetCDFParser extends AbstractParser implements Serializable
- serialVersionUID:
- -5940938274907708665L
-
-
Package org.apache.tika.parser.ocr
-
Class org.apache.tika.parser.ocr.TesseractOCRConfig extends Object implements Serializable
- serialVersionUID:
- -4861942486845757891L
-
Serialized Fields
-
applyRotation
boolean applyRotation
-
colorspace
String colorspace
-
density
int density
-
depth
int depth
-
enableImageProcessing
int enableImageProcessing
-
filter
String filter
-
imageMagickPath
String imageMagickPath
-
language
String language
-
maxFileSizeToOcr
long maxFileSizeToOcr
-
minFileSizeToOcr
long minFileSizeToOcr
-
otherTesseractConfig
Map<String,String> otherTesseractConfig
-
outputType
TesseractOCRConfig.OUTPUT_TYPE outputType
-
pageSegMode
String pageSegMode
-
pageSeparator
String pageSeparator
-
preserveInterwordSpacing
boolean preserveInterwordSpacing
-
resize
int resize
-
tessdataPath
String tessdataPath
-
tesseractPath
String tesseractPath
-
timeout
int timeout
-
-
Class org.apache.tika.parser.ocr.TesseractOCRParser extends AbstractParser implements Serializable
- serialVersionUID:
- -8167538283213097265L
-
Serialized Fields
-
defaultConfig
TesseractOCRConfig defaultConfig
-
-
-
Package org.apache.tika.parser.odf
-
Class org.apache.tika.parser.odf.OpenDocumentContentParser extends AbstractParser implements Serializable
-
Class org.apache.tika.parser.odf.OpenDocumentMetaParser extends XMLParser implements Serializable
- serialVersionUID:
- -8739250869531737584L
-
Class org.apache.tika.parser.odf.OpenDocumentParser extends AbstractParser implements Serializable
- serialVersionUID:
- -6410276875438618287L
-
-
Package org.apache.tika.parser.opendocument
-
Class org.apache.tika.parser.opendocument.OpenOfficeParser extends OpenDocumentParser implements Serializable
-
-
Package org.apache.tika.parser.pdf
-
Class org.apache.tika.parser.pdf.AccessChecker extends Object implements Serializable
- serialVersionUID:
- 6492570218190936986L
-
Serialized Fields
-
allowAccessibility
boolean allowAccessibility
-
needToCheck
boolean needToCheck
-
-
Class org.apache.tika.parser.pdf.PDFParser extends AbstractParser implements Serializable
- serialVersionUID:
- -752276948656079347L
-
Serialized Fields
-
defaultConfig
PDFParserConfig defaultConfig
-
initializableProblemHandler
InitializableProblemHandler initializableProblemHandler
-
-
Class org.apache.tika.parser.pdf.PDFParserConfig extends Object implements Serializable
- serialVersionUID:
- 6492570218190936986L
-
Serialized Fields
-
accessChecker
AccessChecker accessChecker
-
averageCharTolerance
Float averageCharTolerance
-
catchIntermediateIOExceptions
boolean catchIntermediateIOExceptions
-
detectAngles
boolean detectAngles
-
enableAutoSpace
boolean enableAutoSpace
-
extractAcroFormContent
boolean extractAcroFormContent
-
extractActions
boolean extractActions
-
extractAnnotationText
boolean extractAnnotationText
-
extractBookmarksText
boolean extractBookmarksText
-
extractFontNames
boolean extractFontNames
-
extractInlineImages
boolean extractInlineImages
-
extractUniqueInlineImagesOnly
boolean extractUniqueInlineImagesOnly
-
ifXFAExtractOnlyXFA
boolean ifXFAExtractOnlyXFA
-
maxMainMemoryBytes
long maxMainMemoryBytes
-
ocrDPI
int ocrDPI
-
ocrImageFormatName
String ocrImageFormatName
-
ocrImageQuality
float ocrImageQuality
-
ocrImageScale
float ocrImageScale
deprecated ... use OCRDPI instead -
ocrImageType
org.apache.pdfbox.rendering.ImageType ocrImageType
-
ocrStrategy
PDFParserConfig.OCR_STRATEGY ocrStrategy
-
setKCMS
boolean setKCMS
-
sortByPosition
boolean sortByPosition
-
spacingTolerance
Float spacingTolerance
-
suppressDuplicateOverlappingText
boolean suppressDuplicateOverlappingText
-
-
-
Package org.apache.tika.parser.pkg
-
Class org.apache.tika.parser.pkg.CompressorParser extends AbstractParser implements Serializable
- serialVersionUID:
- 2793565792967222459L
-
Serialized Fields
-
memoryLimitInKb
int memoryLimitInKb
-
-
Class org.apache.tika.parser.pkg.PackageParser extends AbstractParser implements Serializable
- serialVersionUID:
- -5331043266963888708L
-
Class org.apache.tika.parser.pkg.RarParser extends AbstractParser implements Serializable
- serialVersionUID:
- 6157727985054451501L
-
Class org.apache.tika.parser.pkg.StreamingZipContainerDetector extends org.apache.tika.parser.pkg.ZipContainerDetectorBase implements Serializable
-
Class org.apache.tika.parser.pkg.ZipContainerDetector extends Object implements Serializable
- serialVersionUID:
- 2891763938430295453L
-
Serialized Fields
-
markLimit
int markLimit
-
streamingZipContainerDetector
StreamingZipContainerDetector streamingZipContainerDetector
-
-
-
Package org.apache.tika.parser.pot
-
Class org.apache.tika.parser.pot.PooledTimeSeriesParser extends AbstractParser implements Serializable
- serialVersionUID:
- -2855917932512164988L
-
-
Package org.apache.tika.parser.prt
-
Class org.apache.tika.parser.prt.PRTParser extends AbstractParser implements Serializable
- serialVersionUID:
- 4659638314375035178L
-
-
Package org.apache.tika.parser.recognition
-
Class org.apache.tika.parser.recognition.AgeRecogniser extends AbstractParser implements Serializable
- serialVersionUID:
- 1108439049093046832L
-
Serialized Fields
-
config
AgeRecogniserConfig config
-
secondaryParser
Tika secondaryParser
-
-
Class org.apache.tika.parser.recognition.ObjectRecognitionParser extends AbstractParser implements Serializable
-
Serialized Fields
-
recogniser
ObjectRecogniser recogniser
-
-
-
-
Package org.apache.tika.parser.recognition.tf
-
Class org.apache.tika.parser.recognition.tf.TensorflowImageRecParser extends ExternalParser implements Serializable
-
-
Package org.apache.tika.parser.rtf
-
Class org.apache.tika.parser.rtf.RTFParser extends AbstractParser implements Serializable
- serialVersionUID:
- -4165069489372320313L
-
Serialized Fields
-
ignoreListMarkup
boolean ignoreListMarkup
-
memoryLimitInKb
int memoryLimitInKb
-
-
-
Package org.apache.tika.parser.sas
-
Class org.apache.tika.parser.sas.SAS7BDATParser extends AbstractParser implements Serializable
- serialVersionUID:
- -2775485539937983150L
-
-
Package org.apache.tika.parser.sentiment
-
Class org.apache.tika.parser.sentiment.SentimentAnalysisParser extends AbstractParser implements Serializable
-
Serialized Fields
-
classifier
opennlp.tools.sentiment.SentimentME classifier
-
modelPath
String modelPath
Path to model path. Default is "https://raw.githubusercontent.com/USCDataScience/SentimentAnalysisParser/master/sentiment-models/en-netflix-sentiment.bin"
The path could be one of the following:- a HTTP or HTTPS URL (Not recommended for production use since no caching is implemented)
- an absolute or relative path on local file system (recommended for production use in standalone mode)
- a relative path known to class loader (Especially useful in distributed environments, recommended for advanced users
-
-
-
-
Package org.apache.tika.parser.strings
-
Class org.apache.tika.parser.strings.FileConfig extends Object implements Serializable
- serialVersionUID:
- 5712655467296441314L
-
Serialized Fields
-
filePath
String filePath
-
mimetype
boolean mimetype
-
-
Class org.apache.tika.parser.strings.Latin1StringsParser extends AbstractParser implements Serializable
- serialVersionUID:
- 1L
-
Serialized Fields
-
inPos
int inPos
The position into the input buffer. -
input
byte[] input
The input buffer. -
inSize
int inSize
The number of bytes into the input buffer. -
minSize
int minSize
The minimum size of a character sequence to be extracted. -
outPos
int outPos
The current position into the output buffer. -
output
byte[] output
The output buffer. -
tmpPos
int tmpPos
The temporary position into the output buffer. -
xhtml
XHTMLContentHandler xhtml
The output content handler.
-
-
Class org.apache.tika.parser.strings.StringsConfig extends Object implements Serializable
- serialVersionUID:
- -1465227101645003594L
-
Serialized Fields
-
encoding
StringsEncoding encoding
-
minLength
int minLength
-
stringsPath
String stringsPath
-
timeout
int timeout
-
-
Class org.apache.tika.parser.strings.StringsParser extends AbstractParser implements Serializable
- serialVersionUID:
- 802566634661575025L
-
-
Package org.apache.tika.parser.txt
-
Class org.apache.tika.parser.txt.Icu4jEncodingDetector extends Object implements Serializable
-
Serialized Fields
-
markLimit
int markLimit
-
stripMarkup
boolean stripMarkup
-
-
-
Class org.apache.tika.parser.txt.TXTParser extends AbstractEncodingDetectorParser implements Serializable
- serialVersionUID:
- -6656102320836888910L
-
Class org.apache.tika.parser.txt.UniversalEncodingDetector extends Object implements Serializable
-
Serialized Fields
-
markLimit
int markLimit
-
-
-
-
Package org.apache.tika.parser.utils
-
Class org.apache.tika.parser.utils.DataURISchemeParseException extends TikaException implements Serializable
-
-
Package org.apache.tika.parser.video
-
Class org.apache.tika.parser.video.FLVParser extends AbstractParser implements Serializable
- serialVersionUID:
- -8718013155719197679L
-
-
Package org.apache.tika.parser.wordperfect
-
Class org.apache.tika.parser.wordperfect.QuattroProParser extends AbstractParser implements Serializable
- serialVersionUID:
- 8941810225917012232L
-
Class org.apache.tika.parser.wordperfect.WordPerfectParser extends AbstractParser implements Serializable
- serialVersionUID:
- 8941810225917012232L
-
Serialized Fields
-
includeDeletedContent
boolean includeDeletedContent
-
-
-
Package org.apache.tika.parser.xliff
-
Class org.apache.tika.parser.xliff.XLIFF12Parser extends AbstractParser implements Serializable
- serialVersionUID:
- 1490085649251663857L
-
Class org.apache.tika.parser.xliff.XLZParser extends AbstractParser implements Serializable
- serialVersionUID:
- -1877314028666058564L
-
Serialized Fields
-
xliffParser
Parser xliffParser
Shared Parser instance.
-
-
-
Package org.apache.tika.parser.xml
-
Class org.apache.tika.parser.xml.DcXMLParser extends XMLParser implements Serializable
- serialVersionUID:
- 4905318835463880819L
-
Class org.apache.tika.parser.xml.FictionBookParser extends XMLParser implements Serializable
- serialVersionUID:
- 4195954546491524374L
-
Class org.apache.tika.parser.xml.XMLParser extends AbstractParser implements Serializable
- serialVersionUID:
- -6028836725280212837L
-
-
Package org.apache.tika.sax
-
Class org.apache.tika.sax.AbstractRecursiveParserWrapperHandler extends DefaultHandler implements Serializable
-
Serialized Fields
-
contentHandlerFactory
ContentHandlerFactory contentHandlerFactory
-
embeddedDepth
int embeddedDepth
-
embeddedResources
int embeddedResources
-
maxEmbeddedResources
int maxEmbeddedResources
-
-
-
Class org.apache.tika.sax.BasicContentHandlerFactory extends Object implements Serializable
-
Serialized Fields
-
type
BasicContentHandlerFactory.HANDLER_TYPE type
-
writeLimit
int writeLimit
-
-
-
Class org.apache.tika.sax.RecursiveParserWrapperHandler extends AbstractRecursiveParserWrapperHandler implements Serializable
-
Class org.apache.tika.sax.TaggedSAXException extends SAXException implements Serializable
-
Serialized Fields
-
tag
Object tag
The object reference used to tag the exception.
-
-
-
-
Package org.apache.tika.server
-
Class org.apache.tika.server.TikaServerParseException extends javax.ws.rs.WebApplicationException implements Serializable
-
-
Package org.apache.tika.utils
-
Class org.apache.tika.utils.XMLReaderUtils extends Object implements Serializable
- serialVersionUID:
- 6110455808615143122L
-
-
Package org.apache.tika.xmp
-
Class org.apache.tika.xmp.XMPMetadata extends Metadata implements Serializable
-
Serialization Methods
-
readObject
private void readObject(ObjectInputStream ois) throws ClassNotFoundException, IOException
- Throws:
ClassNotFoundException
IOException
-
writeObject
private void writeObject(ObjectOutputStream ois) throws IOException
- Throws:
IOException
-
-
Serialized Fields
-
xmpData
com.adobe.xmp.XMPMeta xmpData
The XMP data
-
-
-