#! /usr/bin/env python # ----------------------------------------------------------------------- # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # ----------------------------------------------------------------------- # ------------------------------------------------------------------------------------- # Updates an existing ducc installation (1.1.0 or newer) from a binary tar.gz build # Updates in place so preserves all the site-specific files # Archives the current build in a timestamped direcory under DUCC_HOME/../ducc_archives # - checks that ducc is not running # - creates a site.ducc.properties file if updating from DUCC 1.1.0 # - creates a time-stamped archive directory to hold the current build # - archives and then updates existing directories, # - keeps the directories: logs state history # - keeps any other files in directories: resources lib # - keeps the customized resources files: site.ducc.properties ducc.classes ducc.nodes jobdriver.nodes ducc.administrators # - rebuilds the non-privileged ducc_ling # # Note: any files added to directories other than resources & lib will not be retained, but will be archived # # To revert back to an archived build: # - copy/move all of the archived files back to the runtime # ------------------------------------------------------------------------------------- import os import sys import datetime import fnmatch import re import shutil def usage(): print "Usage: ducc_update ducc-runtime ducc-binary-tar" print "" print " Updates the DUCC installed at 'ducc-runtime' with the DUCC build in 'ducc-binary-tar'" print " - checks that DUCC is not running" print " - creates a site.ducc.properties file if updating from DUCC 1.1.0" print " - creates a time-stamped archive directory to hold the current build" print " - archives and then updates existing directories," print " - keeps the directories: logs state history" print " - keeps any other files in directories: resources lib" print " - keeps the customized resources files: site.ducc.properties ducc.classes ducc.nodes jobdriver.nodes ducc.administrators" print " - rebuilds the non-privileged ducc_ling" print " Note: any files added to directories other than resources & lib will not be retained, but will be archived" print "" print " To revert back to an archived build:" print " - copy/move all of the archived files back to the runtime" #----------------------------------------------------------------------------------------- # Get the version of the build from the name of the cli jar #----------------------------------------------------------------------------------------- def get_oversion(runtime): fns = fnmatch.filter(os.listdir(runtime + '/lib/uima-ducc'), 'uima-ducc-cli-*.jar') if ( len(fns) == 0 ): raise Exception("Not a valid DUCC installation - missing versioned cli jar") m = re.match('uima\\-ducc\\-cli\\-([a-zA-Z0-9_\\.\\-]+)\\.jar', fns[0]) if ( m == None ): raise Exception("Not a valid DUCC installation - invalid name: " + fns[0]) return m.group(1) #----------------------------------------------------------------------------------------- # Get the version of the build from the name of the tar file #----------------------------------------------------------------------------------------- def get_nversion(tarfile): p = re.compile('^uima\\-ducc\\-([a-zA-Z0-9_\\.\\-]+)\\-bin\\.tar\\.gz$') m = p.search(tarfile, 0) if ( m == None ): raise Exception("Invalid tar file name: " + tarfile + " ... expecting: uima-ducc--bin.tar.gz") return m.group(1) #----------------------------------------------------------------------------------------- # Clear out the old expanded tarball if needed, and expand the new one #----------------------------------------------------------------------------------------- def expand_tarball(tarfile, nversion, outdir): extract = os.path.join(outdir, 'apache-uima-ducc-' + nversion) try: if ( os.path.exists(extract) ): shutil.rmtree(extract); except: raise Exception("Cannot remove old tarball extract: " + extract) cmd = 'umask 022 && tar -C ' + outdir + ' -xf ' + tarfile rc = os.system(cmd); if ( rc != 0 ): raise Exception("Command fails, rc= " + str(rc) + "\n " + cmd) if not os.path.exists(extract): raise Exception("Cannot extract runtime from tarball " + tarfile + " Expecting to find " + extract) return extract #----------------------------------------------------------------------------------------- # Convert final comments into a DUCC 1.1.0 properties file #----------------------------------------------------------------------------------------- def create_110_properties(source, dest): found = False with open(source) as f: for line in f: if not found: if line.startswith("#=====MARKER====="): found = True outf = open(dest, 'w') else: outf.write('ducc.' + line[1:]) outf.close(); #----------------------------------------------------------------------------------------- # Update directory by archiving all files that are replaced, unless in exclude list # Optionally rename new files that don't replace existing files #----------------------------------------------------------------------------------------- def update_dir(olddir, newdir, archive, excludeFiles, rename): files = os.listdir(newdir) for f in files: curf = os.path.join(olddir, f) srcf = os.path.join(newdir, f) if f in excludeFiles: if os.path.exists(curf): if rename: cmd = 'cmp --quiet ' + srcf + ' ' + curf rc = os.system(cmd); if rc != 0: os.rename(srcf, curf + '-new') print "Keeping", f, '(new version saved as', f+'-new)' else: print "Adding", resf os.rename(srcf, curf) else: if os.path.exists(curf): print "Replacing", f os.rename(curf, os.path.join(archive,f)) else: print "Adding", f os.rename(srcf, curf) #----------------------------------------------------------------------------------------- # Main program #----------------------------------------------------------------------------------------- if len(sys.argv) < 3 : usage() exit(1) runtime = sys.argv[1] tarfile = sys.argv[2] # Check if appears to be a valid (stopped) DUCC installation runtime = os.path.realpath(runtime) if not os.path.exists(os.path.join(runtime, 'resources/ducc.properties')): print "ERROR - Not a valid DUCC runtime directory:", runtime exit(1) if os.path.exists(os.path.join(runtime, 'state/ducc.pids')): print "ERROR - DUCC appears to be running ... please run 'stop_ducc -a'" exit(1) if not os.path.exists(tarfile): print "ERROR - Missing tar file", tarfile exit(1) oversion = get_oversion(runtime) if oversion == '1.0.0': print "Sorry, migration not supported for DUCC 1.0.0 at present" exit(9) nversion = get_nversion(os.path.basename(tarfile)) #----------------------------------------------------------------------------------------- # Create archive directory #----------------------------------------------------------------------------------------- runtimeParent,runtimeName = os.path.split(runtime) now = datetime.datetime.now() nowstr = now.strftime('%Y%m%d-%H%M') archiveParent = os.path.join(runtimeParent, 'ducc_archives') archive = os.path.join(archiveParent, runtimeName + '_' + nowstr) if not os.path.exists(archive): os.makedirs(archive) print " --- Updating DUCC to", nversion, " and archiving", oversion, "to", archive print " NOTE: this update may be reversed by copying back all of the archived files," print " e.g. cp --recursive --remove-destination", archive+"/*", runtime #----------------------------------------------------------------------------------------- # Expand tarball to a peer of the runtime so can move directories #----------------------------------------------------------------------------------------- newducc = expand_tarball( tarfile, nversion, archiveParent ) #----------------------------------------------------------------------------------------- # May need to create the new webserver directory for 2.0 #----------------------------------------------------------------------------------------- weblogdir = os.path.join(runtime, 'logs/webserver') if not os.path.exists(weblogdir): os.makedirs(weblogdir) #----------------------------------------------------------------------------------------- # Create a site.ducc.properties file if missing ... only for DUCC 1.1.0 #----------------------------------------------------------------------------------------- siteProps = os.path.join(runtime, 'resources/site.ducc.properties') if not os.path.exists(siteProps): if oversion != '1.1.0': print "Missing site.ducc.properties - can only be created for 1.1.0" exit(9) currentProps = os.path.join(runtime, 'resources/ducc.properties') originalProps = os.path.join(runtime, 'resources/ducc-1.1.0.properties') create_110_properties(os.path.realpath(sys.argv[0]), originalProps) if not os.path.exists(originalProps): print "ERROR - Failed to create the 1.1.0 properties file from the ending comments in this script" exit(9) # Use the new props manager - use abs fnames as otherwise are relative to the deduced DUCC_HOME/resources cmd = newducc + '/admin/ducc_props_manager --delta ' + originalProps + ' --with ' + currentProps + ' --to ' + siteProps rc = os.system(cmd) if rc != 0: print "ERROR", rc, "Failed to create", siteProps exit(9) print " --- Created a file with just the site-specific properties:", siteProps #----------------------------------------------------------------------------------------- # Add or replace (after archiving) all directories in the new build EXCEPT resources & lib # Note that the history, logs, & state directories are not part of a build so are left unchanged #----------------------------------------------------------------------------------------- print " --- Processing", runtimeName, "folder:" update_dir(runtime, newducc, archive, ['resources', 'lib'], False) #----------------------------------------------------------------------------------------- # Add or replace (after archiving) all files in the lib directory # This ensures that any site local jars remain #----------------------------------------------------------------------------------------- print " --- Processing lib folder:" libarchive = os.path.join(archive, 'lib') if not os.path.exists(libarchive): os.mkdir(libarchive) update_dir(os.path.join(runtime, 'lib'), os.path.join(newducc, 'lib'), libarchive, [], False) #----------------------------------------------------------------------------------------- # Add or replace (after archiving) some of the files in resources # Don't change any that are site-specific # (The nodes files should not be in the new build, but just in case ...) #----------------------------------------------------------------------------------------- print " --- Processing resources folder:" resarchive = os.path.join(archive, 'resources') if not os.path.exists(resarchive): os.mkdir(resarchive) preserveFiles = [ 'ducc.classes', 'ducc.administrators', 'ducc.nodes', 'jobdriver.nodes' ] update_dir(os.path.join(runtime, 'resources'), os.path.join(newducc, 'resources'), resarchive, preserveFiles, True) #----------------------------------------------------------------------------------------- # Delete what's left of the extract (just the resources & lib folders) #----------------------------------------------------------------------------------------- shutil.rmtree(newducc) #----------------------------------------------------------------------------------------- # Re-build ducc_ling # Since it needs ducc.properties run the merge from the admin directory #----------------------------------------------------------------------------------------- print " ---" print " --- Rebuilding ducc_ling" os.chdir(runtime + '/admin') rc = os.system('./ducc_props_manager --merge ../resources/default.ducc.properties --with ../resources/site.ducc.properties --to ../resources/ducc.properties') if (rc != 0): print "ERROR - failed to create ducc.properties and to rebuild ducc_ling" exit(9) rc = os.system('./build_duccling') if (rc != 0): print "ERROR - failed to rebuild ducc_ling" exit(9) print "" print " >>> Update completed!" print " NOTE - if your ducc_ling is privileged you should update it" #->->->->->->-> DO NOT CHANGE ANYTHING BELOW THIS MARKER <-<-<-<-<-<-<- #=====MARKER===== The following are the original ducc.properties shipped with DUCC 1.1.0 #head= #jvm= #cluster.name=Apache UIMA-DUCC #private.resources=${DUCC_HOME}/resources.private #jms.provider=activemq #broker.protocol=tcp #broker.hostname=${ducc.head} #broker.port=61617 #broker.url.decoration=jms.useCompression=true #broker.name=localhost #broker.jmx.port=1100 #broker.credentials.file=${ducc.private.resources}/ducc-broker-credentials.properties #broker.automanage=true #broker.memory.options=-Xmx1G #broker.configuration=conf/activemq-ducc.xml #broker.home=${DUCC_HOME}/apache-uima/apache-activemq #broker.server.url.decoration=transport.soWriteTimeout=45000 #locale.language=en #locale.country=us #node.min.swap.threshold=0 #admin.endpoint=ducc.admin.channel #admin.endpoint.type=topic #jmx.port=2099 #agent.jvm.args=-Xmx500M #orchestrator.jvm.args=-Xmx1G #rm.jvm.args=-Xmx1G #pm.jvm.args=-Xmx1G #sm.jvm.args=-Xmx1G #db.jvm.args=-Xmx2G #ws.jvm.args=-Xmx2G -Djava.util.Arrays.useLegacyMergeSort=true #environment.propagated=USER HOME LANG #cli.httpclient.sotimeout=0 #signature.required=on #db.configuration.class=org.apache.uima.ducc.db.config.DbComponentConfiguration #db.state.update.endpoint=ducc.db.state #db.state.update.endpoint.type=topic #db.state.publish.rate=15000 #ws.configuration.class=org.apache.uima.ducc.ws.config.WebServerConfiguration #ws.port=42133 #ws.port.ssl=42155 #ws.session.minutes=60 #ws.automatic.cancel.minutes=5 #ws.max.history.entries=4096 #ws.jsp.compilation.directory=/tmp/ducc/jsp #ws.login.enabled=false #ws.visualization.strip.domain=true #jd.configuration.class=org.apache.uima.ducc.jd.config.JobDriverConfiguration #jd.state.update.endpoint=ducc.jd.state #jd.state.update.endpoint.type=topic #jd.state.publish.rate=15000 #jd.queue.prefix=ducc.jd.queue. #jd.queue.timeout.minutes=5 #jd.host.class=JobDriver #jd.host.description=Job Driver #jd.host.memory.size=2GB #jd.host.number.of.machines=1 #jd.host.user=System #jd.share.quantum=400 #threads.limit=500 #driver.jvm.args=-Xmx300M #sm.configuration.class=org.apache.uima.ducc.sm.config.ServiceManagerConfiguration #sm.state.update.endpoint=ducc.sm.state #sm.state.update.endpoint.type=topic #sm.default.monitor.class=org.apache.uima.ducc.cli.UimaAsPing #sm.instance.failure.max=5 #sm.instance.failure.limit=${ducc.sm.instance.failure.max} #sm.instance.failure.window=30 #sm.init.failure.limit=1 #sm.meta.ping.rate=60000 #sm.meta.ping.stability=10 #sm.meta.ping.timeout=15000 #sm.http.port=19989 #sm.http.node=${ducc.head} #sm.default.linger=300000 #orchestrator.configuration.class=org.apache.uima.ducc.orchestrator.config.OrchestratorConfiguration #orchestrator.start.type=warm #orchestrator.state.update.endpoint=ducc.orchestrator.state #orchestrator.state.update.endpoint.type=topic #orchestrator.state.publish.rate=10000 #orchestrator.abbreviated.state.update.endpoint=ducc.orchestrator.abbreviated.state #orchestrator.abbreviated.state.update.endpoint.type=topic #orchestrator.abbreviated.state.publish.rate=10000 #orchestrator.maintenance.rate=60000 #orchestrator.http.port=19988 #orchestrator.http.node=${ducc.head} #orchestrator.unmanaged.reservations.accepted=true #rm.configuration.class=org.apache.uima.ducc.rm.config.ResourceManagerConfiguration #rm.state.update.endpoint=ducc.rm.state #rm.state.update.endpoint.type=topic #rm.state.publish.rate=10000 #rm.share.quantum=1 #rm.scheduler=org.apache.uima.ducc.rm.scheduler.NodepoolScheduler #rm.class.definitions=ducc.classes #rm.default.memory=4 #rm.init.stability=2 #rm.node.stability=5 #rm.eviction.policy=SHRINK_BY_INVESTMENT #rm.initialization.cap=1 #rm.expand.by.doubling=true #rm.prediction=true #rm.prediction.fudge=120000 #rm.fragmentation.threshold=8 #rm.admin.endpoint=ducc.rm.admin.channel #rm.admin.endpoint.type=queue #agent.configuration.class=org.apache.uima.ducc.agent.config.AgentConfiguration #agent.request.endpoint=ducc.agent #agent.request.endpoint.type=topic #agent.managed.process.state.update.endpoint=ducc.managed.process.state.update #agent.managed.process.state.update.endpoint.type=socket #agent.managed.process.state.update.endpoint.params=transferExchange=true&sync=false #agent.node.metrics.sys.gid.max=500 #agent.node.metrics.publish.rate=30000 #agent.node.metrics.endpoint=ducc.node.metrics #agent.node.metrics.endpoint.type=topic #agent.node.inventory.publish.rate=10000 #agent.node.inventory.publish.rate.skip=30 #agent.node.inventory.endpoint=ducc.node.inventory #agent.node.inventory.endpoint.type=topic #agent.launcher.thread.pool.size=10 #agent.launcher.use.ducc_spawn=true #agent.launcher.ducc_spawn_path=${DUCC_HOME}/admin/ducc_ling #agent.launcher.process.stop.timeout=60000 #agent.launcher.process.init.timeout=7200000 #agent.rogue.process.user.exclusion.filter= #agent.rogue.process.exclusion.filter=sshd:,-bash,-sh,/bin/sh,/bin/bash,grep,ps #agent.share.size.fudge.factor=5 #agent.launcher.cgroups.enable=false #agent.launcher.cgroups.utils.dir=/usr/bin,/bin #agent.exclusion.file=${DUCC_HOME}/resources/exclusion.nodes #pm.configuration.class=org.apache.uima.ducc.pm.config.ProcessManagerConfiguration #pm.request.endpoint=ducc.pm #pm.request.endpoint.type=queue #pm.state.update.endpoint=ducc.pm.state #pm.state.update.endpoint.type=topic #pm.state.publish.rate=15000 #uima-as.configuration.class=org.apache.uima.ducc.agent.deploy.uima.UimaAsServiceConfiguration #uima-as.endpoint=ducc.job.managed.service #uima-as.endpoint.type=socket #uima-as.endpoint.params=transferExchange=true&sync=false #uima-as.saxon.jar.path=file:${DUCC_HOME}/apache-uima/saxon/saxon8.jar #uima-as.dd2spring.xsl.path=${DUCC_HOME}/apache-uima/bin/dd2spring.xsl #flow-controller.specifier=org.apache.uima.ducc.common.uima.DuccJobProcessFC