Coverage Report - org.apache.any23.extractor.html.HCardName
 
Classes in this File Line Coverage Branch Coverage Complexity
HCardName
0%
0/92
0%
0/74
3.455
HCardName$FieldValue
0%
0/14
0%
0/12
3.455
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  *
 9  
  *  http://www.apache.org/licenses/LICENSE-2.0
 10  
  *
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 
 18  
 package org.apache.any23.extractor.html;
 19  
 
 20  
 import org.w3c.dom.Node;
 21  
 
 22  
 import java.util.ArrayList;
 23  
 import java.util.Arrays;
 24  
 import java.util.Collection;
 25  
 import java.util.Collections;
 26  
 import java.util.HashMap;
 27  
 import java.util.List;
 28  
 import java.util.Map;
 29  
 
 30  
 import static org.apache.any23.extractor.html.HTMLDocument.TextField;
 31  
 
 32  
 /**
 33  
  * An HCard name, consisting of various parts. Handles computation
 34  
  * of full names from first and last names, and similar computations.
 35  
  *
 36  
  * @author Richard Cyganiak (richard@cyganiak.de)
 37  
  */
 38  0
 public class HCardName {
 39  
 
 40  
     public static final String GIVEN_NAME = "given-name";
 41  
     public static final String FAMILY_NAME = "family-name";
 42  
     public static final String ADDITIONAL_NAME = "additional-name";
 43  
     public static final String NICKNAME = "nickname";
 44  
     public static final String HONORIFIC_PREFIX = "honorific-prefix";
 45  
     public static final String HONORIFIC_SUFFIX = "honorific-suffix";
 46  
 
 47  0
     public static final String[] FIELDS = {
 48  
             GIVEN_NAME,
 49  
             FAMILY_NAME,
 50  
             ADDITIONAL_NAME,
 51  
             NICKNAME,
 52  
             HONORIFIC_PREFIX,
 53  
             HONORIFIC_SUFFIX
 54  
     };
 55  
 
 56  0
     private static final String[] NAME_COMPONENTS = {
 57  
             HONORIFIC_PREFIX,
 58  
             GIVEN_NAME,
 59  
             ADDITIONAL_NAME,
 60  
             FAMILY_NAME,
 61  
             HONORIFIC_SUFFIX
 62  
     };
 63  
 
 64  0
     private Map<String, FieldValue> fields = new HashMap<String, FieldValue>();
 65  0
     private TextField[] fullName   = null;
 66  0
     private TextField organization = null;
 67  0
     private TextField unit         = null;
 68  
 
 69  
     private static TextField join(TextField[] sarray, String delimiter) {
 70  0
         StringBuilder builder = new StringBuilder();
 71  0
         final int sarrayLengthMin2 =  sarray.length - 1;
 72  0
         for(int i = 0; i < sarray.length; i++) {
 73  0
             builder.append(sarray[i].value());
 74  0
             if( i < sarrayLengthMin2) {
 75  0
                 builder.append(delimiter);
 76  
             }
 77  
         }
 78  0
         return new TextField( builder.toString(), sarray[0].source() ) ;
 79  
     }
 80  
 
 81  
     /**
 82  
      * Resets the content of the HName fields.
 83  
      */
 84  
     public void reset() {
 85  0
         fields.clear();
 86  0
         fullName = null;
 87  0
         organization = null;
 88  0
         unit = null;
 89  0
     }
 90  
 
 91  
     public void setField(String fieldName, TextField nd) {
 92  0
         final String value = fixWhiteSpace( nd.value() );
 93  0
         if (value == null) return;
 94  0
         FieldValue fieldValue = fields.get(fieldName);
 95  0
         if(fieldValue == null) {
 96  0
             fieldValue = new FieldValue();
 97  0
             fields.put(fieldName, fieldValue);
 98  
         }
 99  0
         fieldValue.addValue( new TextField(value, nd.source()) );
 100  0
     }
 101  
 
 102  
     public void setFullName(TextField nd) {
 103  0
         final String value = fixWhiteSpace( nd.value() );
 104  0
         if (value == null) return;
 105  0
         String[] split = value.split("\\s+");
 106  
         // Supporting case: ['King,',  'Ryan'] that is converted to ['Ryan', 'King'] .
 107  0
         final String split0 = split[0];
 108  0
         final int split0Length = split0.length();
 109  0
         if(split.length > 1 && split0.charAt(split0Length -1) == ',') {
 110  0
             String swap = split[1];
 111  0
             split[1] = split0.substring(0, split0Length -1);
 112  0
             split[0] = swap;
 113  
         }
 114  0
         TextField[] splitFields = new TextField[split.length];
 115  0
         for(int i = 0; i < split.length; i++) {
 116  0
             splitFields[i] = new TextField(split[i], nd.source());
 117  
         }
 118  0
         this.fullName = splitFields;
 119  0
     }
 120  
 
 121  
     public void setOrganization(TextField nd) {
 122  0
         final String value = fixWhiteSpace( nd.value() );
 123  0
         if (value == null) return;
 124  0
         this.organization = new TextField(value, nd.source());
 125  0
     }
 126  
 
 127  
     public boolean isMultiField(String fieldName) {
 128  0
         FieldValue fieldValue = fields.get(fieldName);
 129  0
         return fieldValue != null && fieldValue.isMultiField();
 130  
     }
 131  
 
 132  
     public boolean containsField(String fieldName) {
 133  0
         return GIVEN_NAME.equals(fieldName) || FAMILY_NAME.equals(fieldName) || fields.containsKey(fieldName);
 134  
     }
 135  
 
 136  
     public TextField getField(String fieldName) {
 137  0
         if (GIVEN_NAME.equals(fieldName)) {
 138  0
             return getFullNamePart(GIVEN_NAME, 0);
 139  
         }
 140  0
         if (FAMILY_NAME.equals(fieldName)) {
 141  0
             return getFullNamePart(FAMILY_NAME, Integer.MAX_VALUE);
 142  
         }
 143  0
         FieldValue v = fields.get(fieldName);
 144  0
         return v == null ? null : v.getValue();
 145  
     }
 146  
 
 147  
     public Collection<TextField> getFields(String fieldName) {
 148  0
         FieldValue v = fields.get(fieldName);
 149  0
         return v == null ? Collections.<TextField>emptyList() : v.getValues();
 150  
     }
 151  
 
 152  
     private TextField getFullNamePart(String fieldName, int index) {
 153  0
         if (fields.containsKey(fieldName)) {
 154  0
             return fields.get(fieldName).getValue();
 155  
         }
 156  0
         if (fullName == null) return null;
 157  
         // If org and fn are the same, the hCard is for an organization, and we do not split the fn
 158  0
         if (organization != null && fullName[0].value().equals(organization.value())) {
 159  0
             return null;
 160  
         }
 161  0
         if (index != Integer.MAX_VALUE && fullName.length <= index) return null;
 162  0
         return fullName[ index == Integer.MAX_VALUE ? fullName.length - 1 : index];
 163  
     }
 164  
 
 165  
     public boolean hasField(String fieldName) {
 166  0
         return getField(fieldName) != null;
 167  
     }
 168  
 
 169  
     public boolean hasAnyField() {
 170  0
         for (String fieldName : FIELDS) {
 171  0
             if (hasField(fieldName)) return true;
 172  
         }
 173  0
         return false;
 174  
     }
 175  
 
 176  
     public TextField getFullName() {
 177  0
         if (fullName != null) return join(fullName, " ");
 178  0
         StringBuffer s = new StringBuffer();
 179  0
         boolean empty = true;
 180  0
         Node first = null;
 181  
         TextField current;
 182  0
         for (String fieldName : NAME_COMPONENTS) {
 183  0
             if (!hasField(fieldName)) continue;
 184  0
             if (!empty) {
 185  0
                 s.append(' ');
 186  
             }
 187  0
             current = getField(fieldName);
 188  0
             if(first == null) { first = current.source(); }
 189  0
             s.append( current.value() );
 190  0
             empty = false;
 191  
         }
 192  0
         if (empty) return null;
 193  0
         return new TextField( s.toString(), first);
 194  
     }
 195  
 
 196  
     public TextField getOrganization() {
 197  0
         return organization;
 198  
     }
 199  
 
 200  
     public void setOrganizationUnit(TextField nd) {
 201  0
         final String value = fixWhiteSpace( nd.value() );
 202  0
         if (value == null) return;
 203  0
         this.unit = new TextField(value, nd.source() );
 204  0
     }
 205  
 
 206  
     public TextField getOrganizationUnit() {
 207  0
         return unit;
 208  
     }
 209  
 
 210  
     private String fixWhiteSpace(String s) {
 211  0
         if (s == null) return null;
 212  0
         s = s.trim().replaceAll("\\s+", " ");
 213  0
         if ("".equals(s)) return null;
 214  0
         return s;
 215  
     }
 216  
 
 217  
     /**
 218  
      * Represents a possible field value.
 219  
      */
 220  0
     private class FieldValue {
 221  
 
 222  
         private TextField value;
 223  0
         private List<TextField> multiValue = new ArrayList<TextField>();
 224  
 
 225  0
         FieldValue() {}
 226  
 
 227  
         void addValue(TextField v) {
 228  0
             if(value == null && multiValue == null) {
 229  0
                 value = v;
 230  0
             } else if(multiValue == null) {
 231  0
                 multiValue = new ArrayList<TextField>();
 232  0
                 multiValue.add(value);
 233  0
                 value = null;
 234  0
                 multiValue.add(v);
 235  
             } else {
 236  0
                 multiValue.add(v);
 237  
             }
 238  0
         }
 239  
 
 240  
         boolean isMultiField() {
 241  0
             return value == null;
 242  
         }
 243  
 
 244  
         TextField getValue() {
 245  0
             return value != null ? value : multiValue.get(0);
 246  
         }
 247  
 
 248  
         Collection<TextField> getValues() {
 249  0
             return value != null ? Arrays.asList(value) : multiValue;
 250  
         }
 251  
     }
 252  
     
 253  
 }