001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.net.nntp;
019
020import java.util.Arrays;
021import java.util.HashMap;
022import java.util.List;
023import java.util.Map;
024
025/**
026 * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
027 * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
028 * For his Java implementation, see
029 * <a href="https://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">
030 * https://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
031 */
032public class Threader {
033
034    /**
035     *
036     * @param threadable
037     * @param idTable
038     */
039    private void buildContainer(final Threadable threadable, final HashMap<String, NntpThreadContainer> idTable) {
040        String id = threadable.messageThreadId();
041        NntpThreadContainer container = idTable.get(id);
042        int bogusIdCount = 0;
043
044        // A NntpThreadContainer exists for this id already. This should be a forward reference, but may
045        // be a duplicate id, in which case we will need to generate a bogus placeholder id
046        if (container != null) {
047            if (container.threadable != null) { // oops! duplicate ids...
048                bogusIdCount++; // Avoid dead local store warning
049                id = "<Bogus-id:" + bogusIdCount + ">";
050                container = null;
051            } else {
052                // The container just contained a forward reference to this message, so let's
053                // fill in the threadable field of the container with this message
054                container.threadable = threadable;
055            }
056        }
057
058        // No container exists for that message Id. Create one and insert it into the hash table.
059        if (container == null) {
060            container = new NntpThreadContainer();
061            container.threadable = threadable;
062            idTable.put(id, container);
063        }
064
065        // Iterate through all the references and create ThreadContainers for any references that
066        // don't have them.
067        NntpThreadContainer parentRef = null;
068        {
069            final String[] references = threadable.messageThreadReferences();
070            for (final String refString : references) {
071                NntpThreadContainer ref = idTable.get(refString);
072
073                // if this id doesn't have a container, create one
074                if (ref == null) {
075                    ref = new NntpThreadContainer();
076                    idTable.put(refString, ref);
077                }
078
079                // Link references together in the order they appear in the References: header,
080                // IF they don't have a parent already &&
081                // IF it will not cause a circular reference
082                if (parentRef != null && ref.parent == null && parentRef != ref && !ref.findChild(parentRef)) {
083                    // Link ref into the parent's child list
084                    ref.parent = parentRef;
085                    ref.next = parentRef.child;
086                    parentRef.child = ref;
087                }
088                parentRef = ref;
089            }
090        }
091
092        // parentRef is now set to the container of the last element in the references field. make that
093        // be the parent of this container, unless doing so causes a circular reference
094        if (parentRef != null && (parentRef == container || container.findChild(parentRef))) {
095            parentRef = null;
096        }
097
098        // if it has a parent already, it's because we saw this message in a References: field, and presumed
099        // a parent based on the other entries in that field. Now that we have the actual message, we can
100        // throw away the old parent and use this new one
101        if (container.parent != null) {
102            NntpThreadContainer rest, prev;
103
104            for (prev = null, rest = container.parent.child; rest != null; prev = rest, rest = rest.next) {
105                if (rest == container) {
106                    break;
107                }
108            }
109
110            if (rest == null) {
111                throw new IllegalStateException("Didnt find " + container + " in parent " + container.parent);
112            }
113
114            // Unlink this container from the parent's child list
115            if (prev == null) {
116                container.parent.child = container.next;
117            } else {
118                prev.next = container.next;
119            }
120
121            container.next = null;
122            container.parent = null;
123        }
124
125        // If we have a parent, link container into the parents child list
126        if (parentRef != null) {
127            container.parent = parentRef;
128            container.next = parentRef.child;
129            parentRef.child = container;
130        }
131    }
132
133    /**
134     * Find the root set of all existing ThreadContainers
135     *
136     * @param idTable
137     * @return root the NntpThreadContainer representing the root node
138     */
139    private NntpThreadContainer findRootSet(final HashMap<String, NntpThreadContainer> idTable) {
140        final NntpThreadContainer root = new NntpThreadContainer();
141        for (final Map.Entry<String, NntpThreadContainer> entry : idTable.entrySet()) {
142            final NntpThreadContainer c = entry.getValue();
143            if (c.parent == null) {
144                if (c.next != null) {
145                    throw new IllegalStateException("c.next is " + c.next.toString());
146                }
147                c.next = root.child;
148                root.child = c;
149            }
150        }
151        return root;
152    }
153
154    /**
155     * If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers.
156     *
157     * @param root
158     */
159    private void gatherSubjects(final NntpThreadContainer root) {
160
161        int count = 0;
162
163        for (NntpThreadContainer c = root.child; c != null; c = c.next) {
164            count++;
165        }
166
167        // TODO verify this will avoid rehashing
168        HashMap<String, NntpThreadContainer> subjectTable = new HashMap<>((int) (count * 1.2), (float) 0.9);
169        count = 0;
170
171        for (NntpThreadContainer c = root.child; c != null; c = c.next) {
172            Threadable threadable = c.threadable;
173
174            // No threadable? If so, it is a dummy node in the root set.
175            // Only root set members may be dummies, and they always have at least 2 kids
176            // Take the first kid as representative of the subject
177            if (threadable == null) {
178                threadable = c.child.threadable;
179            }
180
181            final String subj = threadable.simplifiedSubject();
182
183            if (subj == null || subj.isEmpty()) {
184                continue;
185            }
186
187            final NntpThreadContainer old = subjectTable.get(subj);
188
189            // Add this container to the table iff:
190            // - There exists no container with this subject
191            // - or this is a dummy container and the old one is not - the dummy one is
192            // more interesting as a root, so put it in the table instead
193            // - The container in the table has a "Re:" version of this subject, and
194            // this container has a non-"Re:" version of this subject. The non-"Re:" version
195            // is the more interesting of the two.
196            if (old == null || c.threadable == null && old.threadable != null
197                    || old.threadable != null && old.threadable.subjectIsReply() && c.threadable != null && !c.threadable.subjectIsReply()) {
198                subjectTable.put(subj, c);
199                count++;
200            }
201        }
202
203        // If the table is empty, we're done
204        if (count == 0) {
205            return;
206        }
207
208        // subjectTable is now populated with one entry for each subject which occurs in the
209        // root set. Iterate over the root set, and gather together the difference.
210        NntpThreadContainer prev, c, rest;
211        for (prev = null, c = root.child, rest = c.next; c != null; prev = c, c = rest, rest = rest == null ? null : rest.next) {
212            Threadable threadable = c.threadable;
213
214            // is it a dummy node?
215            if (threadable == null) {
216                threadable = c.child.threadable;
217            }
218
219            final String subj = threadable.simplifiedSubject();
220
221            // Don't thread together all subjectless messages
222            if (subj == null || subj.isEmpty()) {
223                continue;
224            }
225
226            final NntpThreadContainer old = subjectTable.get(subj);
227
228            if (old == c) { // That's us
229                continue;
230            }
231
232            // We have now found another container in the root set with the same subject
233            // Remove the "second" message from the root set
234            if (prev == null) {
235                root.child = c.next;
236            } else {
237                prev.next = c.next;
238            }
239            c.next = null;
240
241            if (old.threadable == null && c.threadable == null) {
242                // both dummies - merge them
243                NntpThreadContainer tail;
244                for (tail = old.child; tail != null && tail.next != null; tail = tail.next) {
245                    // do nothing
246                }
247
248                if (tail != null) { // protect against possible NPE
249                    tail.next = c.child;
250                }
251
252                for (tail = c.child; tail != null; tail = tail.next) {
253                    tail.parent = old;
254                }
255
256                c.child = null;
257            } else if (old.threadable == null || c.threadable != null && c.threadable.subjectIsReply() && !old.threadable.subjectIsReply()) {
258                // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old
259                c.parent = old;
260                c.next = old.child;
261                old.child = c;
262            } else {
263                // else make the old and new messages be children of a new dummy container.
264                // We create a new container object for old.msg and empty the old container
265                final NntpThreadContainer newc = new NntpThreadContainer();
266                newc.threadable = old.threadable;
267                newc.child = old.child;
268
269                for (NntpThreadContainer tail = newc.child; tail != null; tail = tail.next) {
270                    tail.parent = newc;
271                }
272
273                old.threadable = null;
274                old.child = null;
275
276                c.parent = old;
277                newc.parent = old;
278
279                // Old is now a dummy- give it 2 kids , c and newc
280                old.child = c;
281                c.next = newc;
282            }
283            // We've done a merge, so keep the same prev
284            c = prev;
285        }
286
287        subjectTable.clear();
288        subjectTable = null;
289
290    }
291
292    /**
293     * Delete any empty or dummy ThreadContainers
294     *
295     * @param parent
296     */
297    private void pruneEmptyContainers(final NntpThreadContainer parent) {
298        NntpThreadContainer container, prev, next;
299        for (prev = null, container = parent.child, next = container.next; container != null; prev = container, container = next, next = container == null
300                ? null
301                : container.next) {
302
303            // Is it empty and without any children? If so,delete it
304            if (container.threadable == null && container.child == null) {
305                if (prev == null) {
306                    parent.child = container.next;
307                } else {
308                    prev.next = container.next;
309                }
310
311                // Set container to prev so that prev keeps its same value the next time through the loop
312                container = prev;
313            }
314
315            // Else if empty, with kids, and (not at root or only one kid)
316            else if (container.threadable == null && (container.parent != null || container.child.next == null)) {
317                // We have an invalid/expired message with kids. Promote the kids to this level.
318                NntpThreadContainer tail;
319                final NntpThreadContainer kids = container.child;
320
321                // Remove this container and replace with 'kids'.
322                if (prev == null) {
323                    parent.child = kids;
324                } else {
325                    prev.next = kids;
326                }
327
328                // Make each child's parent be this level's parent -> i.e. promote the children.
329                // Make the last child's next point to this container's next
330                // i.e. splice kids into the list in place of container
331                for (tail = kids; tail.next != null; tail = tail.next) {
332                    tail.parent = container.parent;
333                }
334
335                tail.parent = container.parent;
336                tail.next = container.next;
337
338                // next currently points to the item after the inserted items in the chain - reset that, so we process the newly
339                // promoted items next time round
340                next = kids;
341
342                // Set container to prev so that prev keeps its same value the next time through the loop
343                container = prev;
344            } else if (container.child != null) {
345                // A real message , with kids
346                // Iterate over the children
347                pruneEmptyContainers(container);
348            }
349        }
350    }
351
352    /**
353     * The client passes in a list of Iterable objects, and the Threader constructs a connected 'graph' of messages
354     *
355     * @param messages iterable of messages to thread, must not be empty
356     * @return null if messages == null or root.child == null or messages list is empty
357     * @since 3.0
358     */
359    public Threadable thread(final Iterable<? extends Threadable> messages) {
360        if (messages == null) {
361            return null;
362        }
363
364        HashMap<String, NntpThreadContainer> idTable = new HashMap<>();
365
366        // walk through each Threadable element
367        for (final Threadable t : messages) {
368            if (!t.isDummy()) {
369                buildContainer(t, idTable);
370            }
371        }
372
373        if (idTable.isEmpty()) {
374            return null;
375        }
376
377        final NntpThreadContainer root = findRootSet(idTable);
378        idTable.clear();
379        idTable = null;
380
381        pruneEmptyContainers(root);
382
383        root.reverseChildren();
384        gatherSubjects(root);
385
386        if (root.next != null) {
387            throw new IllegalStateException("root node has a next:" + root);
388        }
389
390        for (NntpThreadContainer r = root.child; r != null; r = r.next) {
391            if (r.threadable == null) {
392                r.threadable = r.child.threadable.makeDummy();
393            }
394        }
395
396        final Threadable result = root.child == null ? null : root.child.threadable;
397        root.flush();
398
399        return result;
400    }
401
402    /**
403     * The client passes in a list of Threadable objects, and the Threader constructs a connected 'graph' of messages
404     *
405     * @param messages list of messages to thread, must not be empty
406     * @return null if messages == null or root.child == null or messages list is empty
407     * @since 2.2
408     */
409    public Threadable thread(final List<? extends Threadable> messages) {
410        return thread((Iterable<? extends Threadable>) messages);
411    }
412
413    // DEPRECATED METHODS - for API compatibility only - DO NOT USE
414
415    /**
416     * The client passes in an array of Threadable objects, and the Threader constructs a connected 'graph' of messages
417     *
418     * @param messages array of messages to thread, must not be empty
419     * @return null if messages == null or root.child == null or messages array is empty
420     * @deprecated (2.2) prefer {@link #thread(List)}
421     */
422    @Deprecated
423    public Threadable thread(final Threadable[] messages) {
424        if (messages == null) {
425            return null;
426        }
427        return thread(Arrays.asList(messages));
428    }
429
430}