# ************************************************************* # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # # ************************************************************* All paragraphs in tables: sprms: 0x2416 (sprmPFInTable) indicates a paragraph is in a table 0x6649 (sprmPTableDepth) demarks the nesting depth of the paragraph paragraph at nesting depth 1: end of cell: 0x7 end of row: 0x7 + sprm 0x2417(sprmFTtp) the end of a row has its own 0x7 paragraphs at nesting depth > 1; end of cell: 0xd + sprm 0x244b(sprmPCell) end of row 0xd + sprm 0x244b(sprmPCell) + sprm 0x244c(sprmPRow) the end of a row has its own 0xd Algorithm to detect table structure: Datastructures: RowData: int getCellCount() // return number of cells in row Handle getStart(i) // get handle for start of cell i Handle getEnd(i) // get handle for end off cell i Properties getProperties() // return properties of row TableData: void addCell(Handle start, Handle end) // insert cell starting at start and ending at end into the // current row void endRow(properties) // end current row and save properties for that row, begin new row int getRowCount // return number of rows in table RowData getRow(i) // get data for row i prevTableDepth depth in table hierarchy of previous paragraph curTableDepth depth in table hierarchy of current paragraph bInCell true if current paragraph is in a cell bEndCell true if current paragraph if the last paragraph of a cell bEndRow true if current paragraph is the end of a row paragraphHandle handle for current paragraph initial: create stack of TableData final: handle remaining TableData on stack creating StreamHandler: push new TableData on stack destroying StreamHandler: handle TableData on top of stack pop TableData from stack StreamHandler::substream: push new TableData on stack handle TableData on top of stack pop TableData from stack starting paragraph group: paragraphHandle = currentHandle; bInCell = false; bCellEnd = false; bRowEnd = false; ending paragraph group: difference = curTableDepth - prevTableDepth if (difference > 0) push difference new TableData onto stack else if (difference < 0) { repeat difference times { handle top of stack pop stack } } precTableDepth = curTableDepth if (bInCell) { if (handleStart is null) handleStart = paragraphHandle; if (bCellEnd) { stack.top().addCell(handleStart, paragraphHandle); clear handleStart } if (bRowEnd) { stack.top().endRow(properties) } in StreamHandler::props: save properties PropertiesHandler::sprm: sprm 0x6649: save value in curTableDepth sprm 0x2416: bInCell = true sprm 0x244b: bCellEnd = true sprm 0x2417: bRowEnd = true text: 0x7: bCellEnd = true