#!/usr/bin/env python # ----------------------------------------------------------------------- # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # ----------------------------------------------------------------------- import os import sys import getopt # # This is a one-time-use utility to move your history and checkpoint files # int the database. It will not run if the database is already populated. # # If you need to run it again, you must first remove or drop your existing database. # from ducc_util import DuccUtil class DbLoader(DuccUtil): def __init__(self): DuccUtil.__init__(self, True) def usage(self, *args): if ( args[0] != None ): print '' print ' '.join(args) print '' print 'Usage:' print '' print 'db_loader -i runtime [-t #threads] [--no-archive]' print '' print 'Where:' print ' -i runtime' print ' Specifies the DUCC_HOME of the DUCC runtime to be moved into the database.' print '' print ' -t #threads' print ' Specifies number of processing threads (default is 10)' print '' print ' --no-archive' print ' Suppresses archival of the input files.' print '' print 'Notes:' print ' 1. Because this is a bootstrap script DUCC must be stopped.' print ' The database will be started & stopped by this script.' print ' 2. Reducing the number of processing threads will reduce the CPU load but' print ' increase the processing time.' print ' 3. Archival consists of renaming relevent input directories and files by appending' print ' ".archive" to their names. It is fully non-destructive and may be reversed by' print ' manually renaming them back to their original names.' sys.exit(1); def main(self, argv): in_home = None out_url = None archive = True nthreads = '10' try: opts, args = getopt.getopt(argv, 'i:t:h?', ['no-archive']) except: self.usage("Invalid arguments", ' '.join(argv)) for ( o, a ) in opts: if o in ('-i'): in_home = a elif o in ('-t'): nthreads = a elif o in ('--no-archive'): archive = False else: self.usage(None) if ( in_home == None ): self.usage("Missing input DUCC_HOME") out_url = self.ducc_properties.get('ducc.database.host') if ( out_url == None ): self.usage("Cannot find 'ducc.database.host' in your properties file."); self.db_start() if ( not self.db_alive(3) ): print "Database is not running or cannot be contacted." return DUCC_JVM_OPTS = '-DDUCC_HOME=' + self.DUCC_HOME if ( not archive ): DUCC_JVM_OPTS = DUCC_JVM_OPTS + ' -DDONT_ARCHIVE' CMD = [self.java(), DUCC_JVM_OPTS, 'org.apache.uima.ducc.database.DbLoader', in_home, out_url, nthreads] CMD = ' '.join(CMD) os.environ['CLASSPATH'] = os.environ['CLASSPATH'] + ':' + self.DUCC_HOME + "/resources" print 'CLASSPATH', os.environ['CLASSPATH'] print 'Executing', CMD os.system(CMD) self.db_stop() if __name__ == "__main__": console = DbLoader() console.main(sys.argv[1:])