View Javadoc
1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  package org.apache.hc.client5.http.psl;
28  
29  import java.io.BufferedReader;
30  import java.io.IOException;
31  import java.io.Reader;
32  import java.util.ArrayList;
33  import java.util.List;
34  
35  import org.apache.hc.core5.annotation.Contract;
36  import org.apache.hc.core5.annotation.ThreadingBehavior;
37  
38  /**
39   * Parses the list from <a href="http://publicsuffix.org/">publicsuffix.org</a>
40   * and configures a PublicSuffixFilter.
41   *
42   * @since 4.4
43   */
44  @Contract(threading = ThreadingBehavior.STATELESS)
45  public final class PublicSuffixListParser {
46  
47      /**
48       * Singleton instance.
49       *
50       * @since 5.2
51       */
52      public static final PublicSuffixListParser INSTANCE = new PublicSuffixListParser();
53  
54      public PublicSuffixListParser() {
55      }
56  
57      /**
58       * Parses the public suffix list format.
59       * <p>
60       * When creating the reader from the file, make sure to use the correct encoding
61       * (the original list is in UTF-8).
62       *
63       * @param reader the data reader. The caller is responsible for closing the reader.
64       * @throws java.io.IOException on error while reading from list
65       */
66      public PublicSuffixList parse(final Reader reader) throws IOException {
67          final List<String> rules = new ArrayList<>();
68          final List<String> exceptions = new ArrayList<>();
69          final BufferedReader r = new BufferedReader(reader);
70  
71          String line;
72          while ((line = r.readLine()) != null) {
73              if (line.isEmpty()) {
74                  continue;
75              }
76              if (line.startsWith("//")) {
77                  continue; //entire lines can also be commented using //
78              }
79              if (line.startsWith(".")) {
80                  line = line.substring(1); // A leading dot is optional
81              }
82              // An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule
83              final boolean isException = line.startsWith("!");
84              if (isException) {
85                  line = line.substring(1);
86              }
87  
88              if (isException) {
89                  exceptions.add(line);
90              } else {
91                  rules.add(line);
92              }
93          }
94          return new PublicSuffixList(DomainType.UNKNOWN, rules, exceptions);
95      }
96  
97      /**
98       * Parses the public suffix list format by domain type (currently supported ICANN and PRIVATE).
99       * <p>
100      * When creating the reader from the file, make sure to use the correct encoding
101      * (the original list is in UTF-8).
102      *
103      * @param reader the data reader. The caller is responsible for closing the reader.
104      * @throws java.io.IOException on error while reading from list
105      *
106      * @since 4.5
107      */
108     public List<PublicSuffixList> parseByType(final Reader reader) throws IOException {
109         final List<PublicSuffixList> result = new ArrayList<>(2);
110 
111         final BufferedReader r = new BufferedReader(reader);
112 
113         DomainType domainType = null;
114         List<String> rules = null;
115         List<String> exceptions = null;
116         String line;
117         while ((line = r.readLine()) != null) {
118             if (line.isEmpty()) {
119                 continue;
120             }
121             if (line.startsWith("//")) {
122 
123                 if (domainType == null) {
124                     if (line.contains("===BEGIN ICANN DOMAINS===")) {
125                         domainType = DomainType.ICANN;
126                     } else if (line.contains("===BEGIN PRIVATE DOMAINS===")) {
127                         domainType = DomainType.PRIVATE;
128                     }
129                 } else {
130                     if (line.contains("===END ICANN DOMAINS===") || line.contains("===END PRIVATE DOMAINS===")) {
131                         if (rules != null) {
132                             result.add(new PublicSuffixList(domainType, rules, exceptions));
133                         }
134                         domainType = null;
135                         rules = null;
136                         exceptions = null;
137                     }
138                 }
139 
140                 continue; //entire lines can also be commented using //
141             }
142             if (domainType == null) {
143                 continue;
144             }
145 
146             if (line.startsWith(".")) {
147                 line = line.substring(1); // A leading dot is optional
148             }
149             // An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule
150             final boolean isException = line.startsWith("!");
151             if (isException) {
152                 line = line.substring(1);
153             }
154 
155             if (isException) {
156                 if (exceptions == null) {
157                     exceptions = new ArrayList<>();
158                 }
159                 exceptions.add(line);
160             } else {
161                 if (rules == null) {
162                     rules = new ArrayList<>();
163                 }
164                 rules.add(line);
165             }
166         }
167         return result;
168     }
169 
170 }