View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.search.backend.remoterepository.extractor;
20  
21  import java.util.List;
22  
23  import org.apache.maven.search.api.Record;
24  import org.apache.maven.search.backend.remoterepository.Context;
25  import org.apache.maven.search.backend.remoterepository.RecordFactory;
26  import org.jsoup.nodes.Document;
27  import org.jsoup.nodes.Element;
28  
29  /**
30   * Extractor for Maven Central.
31   */
32  public class MavenCentralResponseExtractor extends ResponseExtractorSupport {
33      /**
34       * Extracts the "name" from {@code href} attribute. In case of Maven Central, the href
35       * attribute contains name in relative form as {@code "name/"} (followed by slash), if name denotes
36       * a directory. The trailing slash is removed by this method, if any.
37       */
38      private String nameInHref(Element element) {
39          String name = element.attr("href");
40          if (name.endsWith("/")) {
41              name = name.substring(0, name.length() - 1);
42          }
43          return name;
44      }
45  
46      @Override
47      public int populateG(Context context, Document document, RecordFactory recordFactory, List<Record> page) {
48          // Index HTML page like this one:
49          // https://repo.maven.apache.org/maven2/org/apache/maven/indexer/
50          Element contents = document.getElementById("contents");
51          if (contents != null) {
52              for (Element element : contents.getElementsByTag("a")) {
53                  String name = nameInHref(element);
54                  if (accept(name)) {
55                      page.add(recordFactory.create(context.getGroupId(), name, null, null, null, null));
56                  }
57              }
58          }
59          return page.size();
60      }
61  
62      @Override
63      public int populateGAV(Context context, Document document, RecordFactory recordFactory, List<Record> page) {
64          // Index HTML page like this one:
65          // https://repo.maven.apache.org/maven2/org/apache/maven/indexer/search-api/7.0.3/
66          Element contents = document.getElementById("contents");
67          if (contents != null) {
68              for (Element element : contents.getElementsByTag("a")) {
69                  // skip possible subdirectories and files without extensions
70                  String name = element.attr("href");
71                  if (name.endsWith("/") || !name.contains(".")) {
72                      continue;
73                  }
74                  populateGAVName(context, nameInHref(element), recordFactory, page);
75              }
76          }
77          return page.size();
78      }
79  }