#! /usr/bin/env python3

"""
Generate 'svn log' output since the last tag to HEAD of a stable branch,
filtering all but merge commits.

If the current working directory is the root of a stable branch (e.g.,
`svn info --show-item=relative=url` == '^/subversion/branches/1.12.x'),
show changes to that branch; else, show changes to the newest branch.
"""

import collections
import datetime
import os
import re
import subprocess
import sys
import tempfile

import xml.etree.ElementTree as ET

SVN = os.getenv('SVN', 'svn')
LOG_SEPARATOR_LINE = ('-' * 72) + '\n'
DIST_RELEASE_URL = 'https://dist.apache.org/repos/dist/release/subversion'
REPOS_ROOT_URL = 'https://svn.apache.org/repos/asf'

Version = collections.namedtuple('Version', 'major minor patch')
Version.__str__ = lambda self: '{major}.{minor}.{patch}'.format(**self._asdict())

def versions_on_dist_release():
    """Return a set of Version objects representing the versions that are
    currently available for download, excluding pre-release artifacts."""
    files_list = subprocess.check_output([SVN, 'ls', '--', DIST_RELEASE_URL]).decode().splitlines()
    versions = map(re.compile(r'^subversion-(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)\.tar\.gz\.asc$').match, files_list)
    versions = filter(None, versions) # drop non-matches
    versions = (matchobj.groupdict() for matchobj in versions)
    versions = ({k: int(v) for k,v in each_dict.items()} for each_dict in versions) # convert values to int
    versions = (Version(**each_dict) for each_dict in versions)
    return tuple(versions)

def get_reference_version__from_working_copy():
    "Get the reference version from the stable branch checked out in cwd."
    ### Doesn't work during the alpha/beta/rc phase; works only after 1.A.0 has been tagged
    version_string = subprocess.check_output(['build/getversion.py', 'SVN', 'subversion/include/svn_version.h']).decode()
    version_broken_down = Version(*map(int, version_string.split('.')))

    # Get the newest patch release that's:
    # - on dist;
    # - within the specified minor line;
    # - older than the working copy.
    #
    # TODO: This will throw ValueError (max() of an empty sequence) if working copy
    #       is older than the oldest patch version still on dist/.
    reference_version = max(v for v in versions_on_dist_release()
                            if v[:2] == version_broken_down[:2] and v < version_broken_down)
    return reference_version

def get_reference_version__latest_stable_release():
    "Return the version number of the latest stable release."
    # Get the available GA releases.
    versions = set(versions_on_dist_release())
    # Normally dist/ would have only one version for each minor line.  If there
    # are two patch versions, we err on the side of showing too much rather
    # than too little.  (The newer patch version might not have been announced
    # yet.)
    oldest_patch_version_within_newest_minor_line = min(v for v in versions if v[:2] == max(versions)[:2])
    return oldest_patch_version_within_newest_minor_line

# Note: This function is also called by ./generate-upcoming-changes-log.sh.
def get_reference_version():
    "Return the version to use as the oldest end of the 'svn log' output to generate."
    def _is_working_copy():
        return os.path.exists('subversion/include/svn_version.h')
    if _is_working_copy():
        return get_reference_version__from_working_copy()
    else:
        return get_reference_version__latest_stable_release()

def copyfrom_revision_of_previous_tag_of_this_stable_branch(reference_version):
    """Returns the copyfrom revision of the REFERENCE_VERSION tag."""
    assert isinstance(reference_version, Version)

    target = REPOS_ROOT_URL + '/subversion/tags/' + '.'.join(map(str, reference_version))
    log_output = \
        subprocess.check_output(
            [SVN, 'log', '-q', '-v', '-l1', '-rHEAD:0', '--stop-on-copy', '--', target + '@']
        ).decode()
    return int(re.compile(r'[(]from \S*:(\d+)[)]').search(log_output).group(1))

def get_merges_for_range(start, end, target_fspath):
    """Return an array of revision numbers in the range -r START:END that are
    merges. TARGET_FSPATH is passed to 'svn log'."""

    cache = []
    revisions = \
        subprocess.check_output(
            [SVN, 'log', '--xml', '-v', '-r', str(start) + ":" + str(end),
             '--', REPOS_ROOT_URL + target_fspath + "@" + str(end)],
        ).decode()
    log_xml = ET.fromstring(revisions)

    for logentry in log_xml.findall('./logentry'):
        is_merge = target_fspath in (path.text for path in logentry.findall('.//path'))
        if is_merge:
            yield logentry

def main():
    reference_version = get_reference_version()
    start_revision = copyfrom_revision_of_previous_tag_of_this_stable_branch(reference_version) + 1
    target_fspath = '/subversion/branches/{major}.{minor}.x'.format(**reference_version._asdict())
    print("Changes in " + '^'+target_fspath + ":")
    for logentry in get_merges_for_range(start_revision, "HEAD", target_fspath):
        f = lambda s: logentry.findall('./' + s)[0].text
        f.__doc__ = """Get the contents of the first child tag whose name is given as an argument."""
        print(LOG_SEPARATOR_LINE, end='')
        print("r%(revision)s | %(author)s | %(date)s | %(linecount)s lines" % dict(
            revision  = logentry.attrib['revision'],
            author    = f('author'),
            date      = datetime.datetime.strptime(f('date'), '%Y-%m-%dT%H:%M:%S.%fZ').strftime('%Y-%m-%d %H:%M:%S +0000 (%a, %d %b %Y)'),
            linecount = 1+len(f('msg').splitlines()), # increment because of the empty line printed next
        ))
        print()
        print(f('msg'))

    print(LOG_SEPARATOR_LINE, end='')

if __name__ == '__main__':
    main()
