View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.search.backend.remoterepository.extractor;
20  
21  import java.util.List;
22  
23  import org.apache.maven.search.api.Record;
24  import org.apache.maven.search.backend.remoterepository.Context;
25  import org.apache.maven.search.backend.remoterepository.RecordFactory;
26  import org.apache.maven.search.backend.remoterepository.ResponseExtractor;
27  import org.jsoup.nodes.Document;
28  import org.jsoup.nodes.Element;
29  
30  /**
31   * A support class for extractors.
32   */
33  public abstract class ResponseExtractorSupport implements ResponseExtractor {
34      protected boolean isChecksum(String name) {
35          return name.endsWith(".sha1") || name.endsWith(".md5") || name.endsWith(".sha256") || name.endsWith(".sha512");
36      }
37  
38      protected boolean isSignature(String name) {
39          return name.endsWith(".asc") || name.endsWith(".sigstore");
40      }
41  
42      protected boolean isMetadata(String name) {
43          return name.equals("maven-metadata.xml");
44      }
45  
46      /**
47       * Returns {@code true} if the name is not empty, not directory special (".."), is not metadata
48       * is not signature and is not checksum. Hence, it should be a name of interest.
49       */
50      protected boolean accept(String name) {
51          return name != null
52                  && !name.isEmpty()
53                  && !name.contains("..")
54                  && !isMetadata(name)
55                  && !isSignature(name)
56                  && !isChecksum(name);
57      }
58  
59      /**
60       * This is Maven metadata parsing, is NOT remote end specific, unlike HTML parsing.
61       */
62      @Override
63      public int populateGA(Context context, Document document, RecordFactory recordFactory, List<Record> page) {
64          // Maven Metadata XML like this one:
65          // https://repo.maven.apache.org/maven2/org/apache/maven/indexer/search-api/maven-metadata.xml
66          Element metadata = document.getElementsByTag("metadata").first();
67          if (metadata != null) {
68              Element versioning = metadata.getElementsByTag("versioning").first();
69              if (versioning != null) {
70                  Element versions = versioning.getElementsByTag("versions").first();
71                  if (versions != null) {
72                      for (Element version : versions.getElementsByTag("version")) {
73                          page.add(recordFactory.create(
74                                  context.getGroupId(), context.getArtifactId(), version.text(), null, null, null));
75                      }
76                  }
77              }
78          }
79          return page.size();
80      }
81  
82      /**
83       * Processes extracted "name" extracted by {@link #populateGAV(Context, Document, RecordFactory, List)} method.
84       */
85      protected void populateGAVName(Context context, String name, RecordFactory recordFactory, List<Record> page) {
86          if (accept(name)) {
87              if (name.startsWith(context.getArtifactId())) {
88                  name = name.substring(context.getArtifactId().length() + 1);
89                  if (name.startsWith(context.getVersion())) {
90                      name = name.substring(context.getVersion().length() + 1);
91                      String ext = null;
92                      String classifier = null;
93                      if (name.contains(".")) {
94                          while (name.contains(".")) {
95                              if (ext == null) {
96                                  ext = name.substring(name.lastIndexOf('.') + 1);
97                              } else {
98                                  ext = name.substring(name.lastIndexOf('.') + 1) + "." + ext;
99                              }
100                             name = name.substring(0, name.lastIndexOf('.'));
101                         }
102                         classifier = name.isEmpty() ? null : name;
103                     } else {
104                         ext = name;
105                     }
106                     page.add(recordFactory.create(
107                             context.getGroupId(),
108                             context.getArtifactId(),
109                             context.getVersion(),
110                             classifier,
111                             ext,
112                             null));
113                 }
114             }
115         }
116     }
117 }