#!/usr/bin/env python3

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Assign shepherds to podlings for an upcoming report cycle.

This script can only be run from a checkout of the Incubator's Subversion
repository.  Assignments are written out to
`content/shepherd_assignments.json`; once this script has been run, the
modified file must be committed.

The roster of active shepherds is maintained in the file
`content/shepherds.json`.

Rules by which shepherds are assigned:

*   Shepherds must not be Mentors for the podling.
*   Shepherds will be assigned a maximum of 3 podlings per cycle, or fewer if
    they choose.  If there are not enough shepherds to meet demand, some
    podlings will not receive shepherd assignments.
*   If possible, no shepherd should be assigned the same podling multiple
    times over the course of incubation.
*   Shepherds may specify a whitelist of podlings which they will accept.

"""

import sys
if sys.version_info < (3, 2):
    raise Exception("Python 3.2 or above is required")

import os
import re
import json
import pickle
import random
import datetime
import argparse
import xml.dom.minidom
from pprint import pprint

class Shepherd(object):
    """An Incubator Shepherd."""

    def __init__(self, apache_id, name=None, max_podlings=3, whitelist=None):
        """
        Return a Shepherd instance.

        *   apache_id -- The Shepherd's apache id.
        *   max_podlings -- Maximum podlings to review per month.
        *   whitelist -- An optional list of acceptable podling IDs.
        """
        self._apache_id = apache_id
        self._max_podlings = int(max_podlings)
        self._name = name
        self._whitelist = set(whitelist) if whitelist else None
        self._assignments = {}

    def accept(self, date, podling_id):
        """Indicate whether the proposed assignment is acceptable."""
        return self._do_accept(date, podling_id, throw=False)

    def _do_accept(self, date, podling_id, throw):
        # If this shepherd has a whitelist, ensure that the podling is in it.
        if self._whitelist:
            if podling_id not in self._whitelist:
                if throw:
                    raise ValueError("Podling not in whitelist")
                return False

        # Ensure that the shepherd has capacity to accept an assignment.
        count = 0
        if date in self._assignments:
            count = len(self._assignments[date])
        if count >= self._max_podlings:
            if throw:
                raise ValueError("Too many podling assignments this month")
            return False
        return True

    def assign(self, date, podling_id):
        """
        Attempt to assign a podling to the Shepherd for a specific report
        date.  Throw an exception if the assignment is not acceptable.
        """
        self._do_accept(date=date, podling_id=podling_id, throw=True)
        self.force_assign(date, podling_id)

    def force_assign(self, date, podling_id):
        """Assign a podling to the shepherd.  Always succeeds."""
        if date not in self._assignments:
            self._assignments[date] = set()
        self._assignments[date].add(podling_id)

    def podling_count(self, date):
        """
        Return the number of podlings that the Shepherd has been assigned
        for the given report date.
        """
        if date in self._assignments:
            return len(self._assignments[date])
        return 0

    def has_tended(self, podling_id):
        """
        Indicate whether the shepherd has ever been assigned the specified
        podling.
        """
        for past in self._assignments:
            if podling_id in past:
                return True
        return False

    def whitelisted(self, podling_id):
        """
        Indicate whether the shepherd is willing to accept the specified
        podling.
        """
        return self._whitelist and podling_id in self._whitelist

    def get_apache_id(self):
        return self._apache_id

    def get_max_podlings(self):
        return self._max_podlings

    @staticmethod
    def bulk_load(f):
        """
        Parse a JSON file stream and returns a dict of (apache_id: Shepherd)
        pairs.
        """
        data = json.load(f)
        shepherds = {}
        for args in data:
            shepherd = Shepherd(**args)
            shepherds[shepherd.get_apache_id()] = shepherd
        return shepherds

class Report(object):
    """An Incubator report to the ASF Board of Directors for a given date."""

    def __init__(self, date):
        """
        Return a Report instance.

        *   date -- A string of the format `YYYY-MM`.
        """
        if not re.match("\\d{4}-\\d{2}", date):
            raise ValueError("Invalid date")
        self._date = date
        self._month = int(date[5:])
        self._assignments = {}

    def get_date(self):
        return self._date

    def assign(self, podling_id, shepherd):
        """Attempt to assign a podling to the specified shepherd."""
        if shepherd is not None and not isinstance(shepherd, Shepherd):
            raise TypeError("Not a Shepherd")
        if podling_id in self._assignments:
            raise ValueError("Podling " + podling_id + " already assigned")
        self._assignments[podling_id] = shepherd
        
    def shepherd(self, podling_id):
        """Return the Shepherd assigned to `podling_id`, if any."""
        if podling_id in self._assignments:
            return self._assignments[podling_id]
        return None

    def podlings(self):
        """Return the podlings reporting this cycle as a `set` of IDs"""
        return set(self._assignments.keys())

    def _select_shepherd(self, podling, shepherds):
        # Exclude mentors and inactive shepherds.
        shep_list = []
        for shep in shepherds.values():
            if shep.get_max_podlings():
                if not podling.has_mentor(shep.get_apache_id()):
                    shep_list.append(shep)

        # Try to distribute podlings evenly amongst the shepherds.
        random.shuffle(shep_list)
        shep_list.sort(key = lambda shep: shep.podling_count(self._date))

        # First, try to assign the podling to someone who's got it whitelisted.
        for shep in shep_list:
            if shep.whitelisted(podling.get_id()):
                if shep.accept(podling_id=podling.get_id(), date=self._date):
                    return shep

        # Try to assign the podling to someone who hasn't shepherded it before.
        for shep in shep_list:
            if shep.has_tended(podling.get_id()):
                continue
            if shep.accept(podling_id=podling.get_id(), date=self._date):
                return shep

        # Find someone who's got the time.
        for shep in shep_list:
            if shep.accept(podling_id=podling.get_id(), date=self._date):
                return shep

        # Nobody's available.
        return None

    def assign_shepherds(self, podlings, shepherds, reports):
        """
        Assign shepherds to this report.

        *   podlings -- a dict of (podling_id: Podling) pairs.
        *   shepherds -- a dict of (apache_id: Shepherd) pairs.
        *   reports -- a dict of ("YYYY-MM": Report) pairs.
        """
        shuffled = list(podlings.values())
        random.shuffle(shuffled)
        for podling in shuffled:
            if podling.report_due(self._month):
                shep = self._select_shepherd(podling, shepherds)
                if shep is not None:
                    shep.assign(date=self._date, podling_id=podling.get_id())
                self.assign(podling_id=podling.get_id(), shepherd=shep)

    @staticmethod
    def bulk_load(shepherds, f):
        """
        Parse a JSON file stream and return a dict of ("YYYY-MM": Report)
        pairs.

        As a side effect, update `shepherds` by assigning podlings from past
        reports.

        *   shepherds: A dict of (apache_id: Shepherd) pairs.
        *   f: A readable file stream.
        """
        data = json.load(f)
        reports = {}
        for date in data:
            report = reports[date] = Report(date=date)
            for podling_id, shepherd_id in data[date].items():
                if shepherd_id and shepherd_id not in shepherds:
                    # Add past shepherds to roster, but indicate that they are
                    # inactive by giving them max_podlings=0.
                    shep = Shepherd(apache_id=shepherd_id, max_podlings=0)
                    shepherds[shepherd_id] = shep
                shepherd = shepherds[shepherd_id] if shepherd_id else None
                report.assign(podling_id=podling_id, shepherd=shepherd)
                if shepherd:
                    shepherd.force_assign(date=date, podling_id=podling_id)
        return reports

    @staticmethod
    def bulk_dump(reports, f):
        """
        Write out a dict of ("YYYY-MM": Report) pairs to a JSON file stream,
        capturing shepherd assignments.
        """
        data = {}
        for date, report in reports.items():
            assigned = {}
            data[report.get_date()] = assigned
            for podling_id in report.podlings():
                shepherd = report.shepherd(podling_id)
                apache_id = shepherd.get_apache_id() if shepherd else None
                assigned[podling_id] = apache_id
        json.dump(data, f, indent=4, sort_keys=True, separators=(",", ": "))

class Podling(object):
    """An Incubator podling."""

    def __init__(self, podling_id, group, monthly):
        """
        Return a Podling instance.

        *   podling_id -- The resource identifier for the podling.
        *   group -- Reporting group (1, 2 or 3).
        *   monthly -- Whether podling currently reports monthly.
        """
        self._id = podling_id 
        self._monthly = monthly
        self._group = group
        self._mentors = set()

    def add_mentor(self, mentor):
        """Add a mentor to the podling."""
        self._mentors.add(mentor)

    def has_mentor(self, apache_id):
        """Indicate whether `apache_id` mentors the podling."""
        return apache_id in self._mentors

    def get_id(self):
        """Return the podlings string resource identifier."""
        return self._id

    def report_due(self, month):
        """
        Indicate whether the podling will have a report due during the
        specified month.
        """
        if self._monthly:
            return True
        if (((month - 1) % 3) + 1) == self._group:
            return True
        return False

    @staticmethod
    def bulk_load(f):
        """
        Parse a podlings.xml file stream and return a dict of
        (podling_id: Podling) pairs.
        """
        podlings = {}
        dom = xml.dom.minidom.parse(f)
        for row in dom.getElementsByTagName("podling"):
            if row.getAttribute("status") != 'current':
                continue
            podling_id = row.getAttribute("name").strip()
            podling_id = podling_id.lower()
            podling_id = podling_id.replace(' ', '')
            reporting = row.getElementsByTagName("reporting")
            if not reporting:
                raise ValueError("podlings.xml is missing 'reporting' for " + podling_id)
            monthly = True if reporting[0].getAttribute("monthly") else False
            group = int(reporting[0].getAttribute("group"))
            podling = Podling(podling_id=podling_id, monthly=monthly,
                              group=group)
            podlings[podling_id] = podling
            for mentor_data in row.getElementsByTagName("mentor"):
                mentor_name = mentor_data.getAttribute("username").strip()
                podling.add_mentor(mentor_name)
        return podlings

def repos_root():
    """Return the root dir of the Incubator version control checkout."""
    return os.path.dirname(os.path.abspath(__file__))

def main():
    # Process arguments and load data.
    options = process_cli_args()
    content_dir = os.path.join(repos_root(), 'content')
    podlings_xml_path = os.path.join(content_dir, 'podlings.xml')
    shepherds_path = os.path.join(content_dir, 'shepherds.json')
    assignments_path = os.path.join(content_dir, 'shepherd_assignments.json')
    with open(podlings_xml_path, 'r') as f:
        podlings = Podling.bulk_load(f=f)
    with open(shepherds_path, 'r') as f:
        shepherds = Shepherd.bulk_load(f=f)
    with open(assignments_path, 'r') as f:
        reports = Report.bulk_load(shepherds=shepherds, f=f)

    # See whether the assignments have already been made for the given month.
    if options.date in reports:
        print("Assignments for {} already complete.".format(options.date))
        sys.exit(0)

    # Perform assignments and dump to `content/shepherds_assignments.json`.
    report = reports[options.date] = Report(date=options.date)
    report.assign_shepherds(podlings=podlings,
                            shepherds=shepherds,
                            reports=reports)
    os.remove(assignments_path)
    with open(assignments_path, 'w') as f:
        Report.bulk_dump(reports=reports, f=f)
    print("Updated {}".format(assignments_path))

def process_cli_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--year', type=int, default=0,
                        help="4 digit year number")
    parser.add_argument('--month', type=int, default=0,
                        help="month number (1-12)")                      
    options = parser.parse_args()
    now = datetime.datetime.now()
    if options.month == 0:
        options.month = (now.month % 12) + 1
    if options.year == 0:
        options.year = now.year
    if options.month < now.month:
        options.year = now.year + 1
    options.date = "{0:04d}-{1:02d}".format(options.year, options.month)
    return options

if __name__ == '__main__':
    main()
