MediaTypeRegistry registry
List<E> detectors
MediaType type
MagicDetector.detect(InputStream, Metadata)
method if a match is found.int length
byte[] pattern
MagicDetector.type
is returned.int patternLength
boolean isRegex
boolean isStringIgnoreCase
byte[] mask
int offsetRangeBegin
int offsetRangeEnd
first offset
.
Note that this is not the offset of the last byte read from the document stream. Instead, the last window of bytes to be compared starts at this offset.
Charset charset
int bytesToTest
int markLimit
int markLimit
TemporaryResources tmp
Set<E> supportedEmbedTypes
Map<K,V> metadataCommandArguments
String[] command
Runtime.exec(String[])
String commandAssignmentOperator
String commandAssignmentDelimeter
String commandAppendOperator
boolean quoteAssignmentValues
ExtractReaderException.TYPE type
String[] charsetsToTry
int index
ContentHandler handler
int writeLimit
ParseContext context
EmbeddedDocumentExtractor embeddedDocumentExtractor
TikaConfig tikaConfig
MimeTypes mimeTypes
Detector detector
ClassLoader loader
Parser parser
Path tikaBin
ParserFactoryFactory parserFactoryFactory
Queue<E> pool
List<E> java
int poolSize
int currentlyInUse
long serverPulseMillis
long serverParseTimeoutMillis
long serverWaitTimeoutMillis
int maxFilesProcessedPerClient
ParseContext context
Parser parser
org.apache.tika.gui.TikaGUI.ImageSavingParser imageParser
CardLayout layout
JPanel cards
JEditorPane html
JEditorPane text
JEditorPane textMain
JEditorPane xml
JEditorPane json
JEditorPane metadata
JFileChooser chooser
String string
int slash
MediaType.string
.int semicolon
MediaType.string
from possible parameters. Length of MediaType.string
in case there are no parameters.Map<K,V> parameters
Map<K,V> registry
Map<K,V> inheritance
MediaType type
int minLength
String acronym
String uti
List<E> links
String description
List<E> magics
List<E> rootXML
List<E> extensions
boolean isInterpreted
MimeType rootMimeType
List<E> rootMimeTypeL
MimeType textMimeType
MimeType htmlMimeType
MimeType xmlMimeType
MediaTypeRegistry registry
Map<K,V> types
org.apache.tika.mime.Patterns patterns
List<E> magics
List<E> xmls
MimeTypes mimeTypes
MediaType rootMediaType
float changeRate
float priorMagicFileType
float priorExtensionFileType
float priorMetaFileType
float magic_trust
float extension_trust
float meta_trust
float magic_neg
float extension_neg
float meta_neg
float threshold
EncodingDetector encodingDetector
Detector detector
MediaTypeRegistry registry
List<E> parsers
Parser fallback
DigestingParser.Digester digester
Parser parser
boolean catchEmbeddedExceptions
char[] delimiters
int markLimit
double minConfidence
String aeDescriptorPath
String UMLSUser
String UMLSPass
boolean prettyPrint
CTAKESSerializer serializerType
OutputStream stream
boolean serialize
boolean text
String[] metadata
CTAKESAnnotationProperty[] annotationProps
char separatorChar
long timeoutMs
Set<E> supportedTypes
Map<K,V> metadataPatterns
String[] command
Runtime.exec(String[])
ExternalParser.LineConsumer ignoredLineConsumer
String command
GeoParserConfig defaultConfig
GeoGazetteerClient gazetteerClient
boolean initialized
URL modelUrl
opennlp.tools.namefind.NameFinderME nameFinder
boolean available
int markLimit
boolean extractScripts
int markLimit
HwpTextExtractorV5 extractor
int maxDataLengthBytes
int FMT_ANPA_1312
int FMT_ANPA_UPI
int FMT_ANPA_UPI_DL
int FMT_IPTC_7901
int FMT_IPTC_PHOTO
int FMT_IPTC_CHAR
int FMT_NITF
int FMT_NITF_TT
int FMT_NITF_RB
int FMT_IPTC_AP
int FMT_IPTC_BLM
int FMT_IPTC_NYT
int FMT_IPTC_RTR
int FORMAT
Connection connection
Detector detector
boolean extractAllAlternatives
OfficeParserConfig defaultOfficeParserConfig
Locale locale
boolean extractMacros
boolean includeDeletedContent
boolean includeMoveFromContent
boolean includeShapeBasedContent
boolean includeHeadersAndFooters
boolean includeMissingRows
boolean includeSlideNotes
boolean includeSlideMasterContent
boolean concatenatePhoneticRuns
boolean useSAXDocxExtractor
boolean useSAXPptxExtractor
boolean extractAllAlternativesFromMSG
String dateOverrideFormat
DecimalFormatSymbols decimalSymbols
DecimalFormat integerFormat
DecimalFormat decimalFormat
DecimalFormat scientificFormat
byte[] signature
int version
int header_len
int unknown_000c
long last_modified
long lang_id
byte[] dir_uuid
byte[] stream_uuid
long unknown_offset
long unknown_len
long dir_offset
long dir_len
long data_offset
int dataRemained
int currentPlace
byte[] signature
int version
int header_len
int unknown_000c
long block_len
int blockidx_intvl
int index_depth
int index_root
int index_head
int unknown_0024
long num_blocks
int unknown_002c
long lang_id
byte[] system_uuid
byte[] unknown_0044
int dataRemained
int currentPlace
long size
byte[] signature
long version
long resetInterval
long windowSize
long windowsPerReset
long unknown_18
int dataRemained
int currentPlace
long version
long block_count
long unknown
long table_offset
long uncompressed_len
long compressed_len
long block_len
long[] block_address
int dataRemained
int currentPlace
byte[] signature
long free_space
int dataRemained
int currentPlace
byte[] signature
long free_space
long unknown_0008
int block_prev
int block_next
int dataRemained
int currentPlace
int memoryLimitInKb
boolean ignoreListMarkup
AbstractMultipleParser.MetadataPolicy policy
Collection<E> parsers
Set<E> offeredTypes
ParserDecorator
MediaTypeRegistry registry
String language
String pageSegMode
long minFileSizeToOcr
long maxFileSizeToOcr
int timeoutSeconds
TesseractOCRConfig.OUTPUT_TYPE outputType
boolean enableImagePreprocessing
int density
int depth
String colorspace
String filter
int resize
String pageSeparator
boolean preserveInterwordSpacing
boolean applyRotation
boolean skipOcr
Map<K,V> otherTesseractConfig
Set<E> userConfigured
Set<E> langs
TesseractOCRConfig defaultConfig
String tesseractPath
String tessdataPath
String imageMagickPath
boolean preloadLangs
boolean hasTesseract
boolean hasImageMagick
org.apache.tika.parser.ocr.ImagePreprocessor imagePreprocessor
boolean extractMacros
boolean needToCheck
boolean allowAccessibility
PDFParserConfig defaultConfig
Set<E> userConfigured
boolean enableAutoSpace
boolean suppressDuplicateOverlappingText
boolean extractAnnotationText
boolean sortByPosition
boolean extractAcroFormContent
boolean extractBookmarksText
boolean extractInlineImages
boolean extractInlineImageMetadataOnly
boolean extractUniqueInlineImagesOnly
boolean extractMarkedContent
Float averageCharTolerance
Float spacingTolerance
float dropThreshold
boolean ifXFAExtractOnlyXFA
PDFParserConfig.OCR_STRATEGY ocrStrategy
PDFParserConfig.OCR_RENDERING_STRATEGY ocrRenderingStrategy
int ocrDPI
org.apache.pdfbox.rendering.ImageType ocrImageType
String ocrImageFormatName
float ocrImageQuality
AccessChecker accessChecker
boolean catchIntermediateIOExceptions
boolean extractActions
boolean extractFontNames
long maxMainMemoryBytes
boolean setKCMS
boolean detectAngles
int memoryLimitInKb
boolean detectCharsetsInEntryNames
Tika secondaryParser
AgeRecogniserConfig config
ObjectRecogniser recogniser
opennlp.tools.sentiment.SentimentME classifier
String modelPath
The path could be one of the following:
int minSize
byte[] output
byte[] input
int tmpPos
int outPos
int inSize
int inPos
XHTMLContentHandler xhtml
String stringsPath
int minLength
StringsEncoding encoding
int timeoutSeconds
StringsConfig defaultStringsConfig
String filePath
FileCommandDetector fileCommandDetector
boolean stringsPresent
boolean hasEncodingOption
String stringsPath
com.amazonaws.services.transcribe.AmazonTranscribeAsync amazonTranscribeAsync
com.amazonaws.services.s3.AmazonS3 amazonS3
String bucketName
String region
boolean isAvailable
String clientId
String clientSecret
com.amazonaws.auth.AWSStaticCredentialsProvider credsProvider
boolean stripMarkup
int markLimit
int markLimit
boolean includeDeletedContent
Parser xliffParser
String id
FetchKey fetchKey
EmitKey emitKey
Metadata metadata
FetchEmitTuple.ON_PARSE_EXCEPTION onParseException
HandlerConfig handlerConfig
BasicContentHandlerFactory.HANDLER_TYPE type
int writeLimit
int maxEmbeddedResources
int sizeOffered
int queueSize
ContentHandlerFactory contentHandlerFactory
int maxEmbeddedResources
int embeddedResources
int embeddedDepth
BasicContentHandlerFactory.HANDLER_TYPE type
int writeLimit
List<E> metadataList
MetadataFilter metadataFilter
Object tag
private void readObject(ObjectInputStream ois) throws ClassNotFoundException, IOException
ClassNotFoundException
IOException
private void writeObject(ObjectOutputStream ois) throws IOException
IOException
com.adobe.internal.xmp.XMPMeta xmpData
Copyright © 2007–2021 The Apache Software Foundation. All rights reserved.