Coverage Report - org.apache.any23.extractor.html.SpanCloserInputStream
 
Classes in this File Line Coverage Branch Coverage Complexity
SpanCloserInputStream
0%
0/43
0%
0/36
5.75
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  *
 9  
  *  http://www.apache.org/licenses/LICENSE-2.0
 10  
  *
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 
 18  
 package org.apache.any23.extractor.html;
 19  
 
 20  
 import java.io.IOException;
 21  
 import java.io.InputStream;
 22  
 
 23  
 /**
 24  
  * Extension of {@link java.io.InputStream} meant to
 25  
  * detect and replace any occurrence of inline <i>span</i>:
 26  
  * <pre>
 27  
  * &lt;span/&gt;
 28  
  * </pre>
 29  
  * with an open close tag sequence:
 30  
  * <pre>
 31  
  * &lt;span&gt;&lt;/span&gt;
 32  
  * </pre>
 33  
  *
 34  
  * @author Michele Mostarda (mostarda@fbk.eu)
 35  
  */
 36  
 public class SpanCloserInputStream extends InputStream {
 37  
 
 38  
     private static final String TRAILING_SEQUENCE_OPEN  = "<span";
 39  
     private static final char   TRAILING_SEQUENCE_CLOSE = '>';
 40  
     private static final String CLOSE_SEQUENCE          = "</span>";
 41  
 
 42  
     private final InputStream wrapped;
 43  
 
 44  0
     private int trailingSequenceOpenMatch  = 0;
 45  0
     private int closeSequenceIndex = 0;
 46  0
     private boolean trailingSequenceOpenDetected  = false;
 47  0
     private boolean trailingSequenceCloseDetected = false;
 48  0
     private boolean inlineDetected = false;
 49  0
     private boolean betweenQuotes = false;
 50  
 
 51  0
     public SpanCloserInputStream(InputStream is) {
 52  0
         wrapped = is;
 53  0
     }
 54  
 
 55  
     @Override
 56  
     public int read() throws IOException {
 57  0
         if(trailingSequenceOpenDetected && inlineDetected && trailingSequenceCloseDetected) {
 58  0
             final int ret = CLOSE_SEQUENCE.charAt(closeSequenceIndex);
 59  0
             closeSequenceIndex++;
 60  0
             if(closeSequenceIndex >= CLOSE_SEQUENCE.length()) {
 61  0
                 resetDetector();
 62  
             }
 63  0
             return ret;
 64  0
         } else if(trailingSequenceOpenDetected && trailingSequenceCloseDetected) {
 65  0
             resetDetector();
 66  
         }
 67  
 
 68  0
         int c = wrapped.read();
 69  0
         if(c == '"') {
 70  0
             betweenQuotes = !betweenQuotes;
 71  0
         } else if(c == '/' && !betweenQuotes && trailingSequenceOpenDetected && !trailingSequenceCloseDetected) {
 72  0
             inlineDetected = true;
 73  0
             c = wrapped.read();
 74  
         }
 75  
 
 76  0
         if( !trailingSequenceOpenDetected && checkOpenTrailingSequence(c) ) {
 77  0
            trailingSequenceOpenDetected = true;
 78  0
             trailingSequenceCloseDetected = false;
 79  0
         } else if(c == TRAILING_SEQUENCE_CLOSE && trailingSequenceOpenDetected) {
 80  0
             trailingSequenceCloseDetected = true;
 81  
         }
 82  0
         return c;
 83  
     }
 84  
 
 85  
     private boolean checkOpenTrailingSequence(int c) {
 86  0
         if( TRAILING_SEQUENCE_OPEN.charAt(trailingSequenceOpenMatch) == Character.toLowerCase(c) ) {
 87  0
             trailingSequenceOpenMatch++;
 88  0
             if(trailingSequenceOpenMatch == TRAILING_SEQUENCE_OPEN.length()) {
 89  0
                 trailingSequenceOpenMatch = 0;
 90  0
                 return true;
 91  
             }
 92  
         } else {
 93  0
             trailingSequenceOpenMatch = 0;
 94  
         }
 95  0
         return false;
 96  
     }
 97  
 
 98  
     private void resetDetector() {
 99  0
         trailingSequenceOpenMatch = 0;
 100  0
         closeSequenceIndex = 0;
 101  0
         trailingSequenceOpenDetected = false;
 102  0
         trailingSequenceCloseDetected = false;
 103  0
         inlineDetected = false;
 104  0
         betweenQuotes = false;
 105  0
     }
 106  
 
 107  
 }