# ----------------------------------------------------------------------- # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # ----------------------------------------------------------------------- # WARNING: DO NOT EDIT THIS FILE. # All customizations must be created in a file "site.ducc.properties" that is in # your ducc_runtime/resources. DUCC's startup procedures will merge this file and # your site.ducc.propeties into ducc.properties which is what DUCC will use. # WARNING DO NOT EDIT THIS FILE. # ==================================================================================== # This configuration file contains most of the specifications for DUCC. # # Two other important customizable files define the classes and nodes. # # The class definition file is specified by the property 'ducc.rm.class.definitions' # and a sample called ducc.classes has been provided in DUCC_HOME/resources. # # Node definitions are by default taken from ducc.nodes but this may be overridden # with the -n option on start_ducc. # # The first two entries here are customized in site.ducc.properties by ducc_post_install. # # ==================================================================================== # +==================================================================================================+ # | General | # +==================================================================================================+ # The name of the node where DUCC runs. # This property declares the node where the DUCC administrative processes run (Orchestrator, # Resource Manager, Process Manager, Service Manager). This property is required and MUST be # configured in new installation. The installation script ducc_post_install initializes this # property to the node the script is executed on. # Reliable DUCC: if running reliably, then this value must resolve to the same ip address # specified for the virtual_ipaddress in /etc/keepalived/keepalived.conf for master and # backup nodes. To avoid nameserver glitches, consider specifying the ip address, not the # host name. DUCC CLI and Agents employ this value to connect to the current reliable # DUCC head node. ducc.head = # Reliable DUCC: if running reliably, then this value must comprise the blank delimited list # of nodes that are eligible to become the DUCC head node. Admin commands start_ducc and # stop_ducc are only allowed on the ducc.head node or any node in the ducc.head.reliable.list. # An empty ducc.head.reliable.list indicates that DUCC is not running in reliably. ducc.head.reliable.list = # The full name of the Java command. # This specifies the full path to the JVM to be used by the DUCC processes. This MUST be # configured. The installation script ducc_post_install initializes this property to # full path to java in the installer's environment. (If the java command cannot be found, # ducc_post_install exits with error.) ducc.jvm = # The name of the cluster as shown by the Web Server. # This is a string used in the Web Server banner to identify the local cluster. It is used # for informational purposes only and may be set to anything desired. ducc.cluster.name=Apache UIMA-DUCC # Specify location of private resources directory. UIMA-3892 ducc.private.resources = ${DUCC_HOME}/resources.private # Location of security home directory. When non-empty the userid is appended and it replaces the # default of as the location of the keys used in request validation. ducc.security.home = # Name any site-local jars. # This may be used to list site-specific jars that are required by local # customizations, e.g. authentication. The value must be a blank-delimited list of jars # relative to the DUCC_HOME/lib directory. #ducc.local.jars = # Declare the type of middleware providing the JMS service used by DUCC. ducc.jms.provider=activemq # +==================================================================================================+ # | Broker | # +==================================================================================================+ #Declare the wire protocol used to communicate with ActiveMQ. ducc.broker.protocol=tcp # This declares the port on which the ActiveMQ broker is listening for messages. ducc.broker.port=61617 # The broker *client* url decoration (ie - DUCC daemons). ducc.broker.url.decoration=jms.useCompression=true&jms.prefetchPolicy.all=0 # The Broker's name must match the actual broker name in the broker config. # This is the internal name of the broker, used to locate Broker's MBean in # JMX Registry. It is NOT related to any node name. When using the ActiveMQ # distribution supplied with DUCC it should always be set to localhost. ducc.broker.name=localhost # The Broker's jmx port. # This is the port used to make JMX connections to the broker. This should only # be changed by administrators familiar with ActiveMQ configuration. ducc.broker.jmx.port=1100 # If set to true, DUCC will start and stop the ActiveMQ broker as part of its normal # start/stop scripting. ducc.broker.automanage = true # This is the ActiveMQ credentials file used to authenticate DUCC daemons with the # broker. ducc.broker.credentials.file=${ducc.private.resources}/ducc-broker-credentials.properties # This sets the heap size for the broker. ducc.broker.memory.options = -Xmx1G # This is the ActiveMQ configuration file to use. The path # must be specified relative to the ActiveMQ installation directory. ducc.broker.configuration = conf/activemq-ducc.xml # This names the location where ActiveMQ is installed. ducc.broker.home = ${DUCC_HOME}/apache-uima/apache-activemq # The broker "server" URL decoration, # = 45 second timeout on write operations to the socket. ducc.broker.server.url.decoration = transport.soWriteTimeout=45000 # +==================================================================================================+ # | Transport | # +==================================================================================================+ # To enable tracing of RM messages arriving in OR and NodeMetrics arriving in WS. #ducc.transport.trace = orchestrator:RmStateDuccEvent webserver:NodeMetricsUpdateDuccEvent # +==================================================================================================+ # | Authentication | # +==================================================================================================+ # This specifies the class used for Web Server session authentication. # If unconfigured, the Web Server enforces no authentication. #ducc.authentication.implementer=org.apache.uima.ducc.ws.authentication.LinuxAuthenticationManager #ducc.authentication.implementer=org.apache.uima.ducc.ws.authentication.SecureFileAuthenticator #ducc.authentication.implementer=org.apache.uima.ducc.ws.authentication.GSAAuthenticator # Specify users allowed to log in to the web server. #ducc.authentication.users.include = user1 user2 # Specify users not allowed to login (default is all users can login; if user is in # both include and exclude lists, then exclude is enforced) #ducc.authentication.users.exclude = user1 user3 # Specify groups allowed to login (default is all groups can login) #ducc.authentication.groups.include = groupA groupB # Specify groups not allowed to login (default is all groups can login; if group is in # both include and exclude lists, then exclude is enforced) #ducc.authentication.groups.exclude = groupA groupC # +==================================================================================================+ # | Language | # +==================================================================================================+ # Establish the language for national language support of messages. # Currently only "en" is supported. ducc.locale.language=en # Establish the country for National Language Support of messages. # Currently only "us" is supported. ducc.locale.country=us # +==================================================================================================+ # | Daemon Administration | # +==================================================================================================+ # This is the JMS endpoint name used for DUCC administration messages. ducc.admin.endpoint=ducc.admin.channel # This is the JMS message type used for DUCC administration messages. # Only "topic" is supported. ducc.admin.endpoint.type=topic # JMX port number for Ducc process. Each DUCC process will attempt # to use this port for its JMX Connector. If the port is not available # port+1 will be used until an available port is found. # Every process started by DUCC has JMX enabled by default. # The DUCC WebSever's System.Daemons page is used to find the JMX URL that gets assigned to # each of the DUCC management processes. The Web Server's Job.Details page for each job is # used to find the JMX URL that is assigned to each JP. ducc.jmx.port=2099 ducc.agent.jvm.args = -Xmx500M ducc.orchestrator.jvm.args = -Xmx1G ducc.rm.jvm.args = -Xmx1G ducc.pm.jvm.args = -Xmx1G ducc.sm.jvm.args = -Xmx1G # use the following flag under IBM Java 8 to allow Chromium to visit # w/o getting ERR_SSL_VERSION_OR_CIPHER_MISMATCH # -Dcom.ibm.jsse2.overrideDefaultTLS=true # see https://issues.apache.org/jira/browse/UIMA-5475 ducc.ws.jvm.args = -Xmx2G -Djava.util.Arrays.useLegacyMergeSort=true -Dcom.ibm.jsse2.overrideDefaultTLS=true # +==================================================================================================+ # | Node Administration | # +==================================================================================================+ # Specify a minimum amount of free swap space available on a node. # If an agent detects free swap space dipping below the value defined # below, it will find the fattest (in terms of memory) process in its # inventory and kill it. The value of the parameter below is expressed # in bytes. # Initially disabled by setting the threshold at 0. ducc.node.min.swap.threshold=0 # +==================================================================================================+ # | Job Administration | # +==================================================================================================+ # Max number of work-item CASes for each job (default is "unlimited") # Note: formerly known as ducc.threads.limit # This enforces a maximum number of pipelines per job, over all its processes. No # job will have more active work-items than this dispatched. # The value is related to the size of the Job Driver heap and the real memory consumed by JD. # If the JD is consuming too much memory, try reducing this value. ducc.job.max.pipelines.count = 5000 # +==================================================================================================+ # | CLI Administration | # +==================================================================================================+ # These environment values are included on job/service/AP submissions ducc.environment.propagated = USER HOME LANG DUCC_SERVICE_INSTANCE # No timeout on CLI requests ducc.cli.httpclient.sotimeout=0 #------------------------------------------------------------------------------ # When set, the CLI signs each request so the Orchestrator can be sure the # requestor is actually who he claims to be. # off, // CLI submit and cancel signature enforcement disabled # on, // CLI submit and cancel signature enforcement enabled (default) ducc.signature.required=on #------------------------------------------------------------------------------ # +==================================================================================================+ # | Web Server | # +==================================================================================================+ # The name of the pluggable java class used to implement the Web Server. ducc.ws.configuration.class=org.apache.uima.ducc.ws.config.WebServerConfiguration # This endpoint is used for Web Server self test to determine viability of broker. # After 3 missed messages to self via broker, Web Server considers broker to be down. ducc.ws.state.update.endpoint=ducc.ws.state # This is the JMS endpoint type used for the state messages sent by the Web Server. ducc.ws.state.update.endpoint.type=topic # The interval in milliseconds between Web Server publications of its state. ducc.ws.state.publish.rate=5000 # The elapsed time in milliseconds between monitored head-node daemons' publications # that if exceeded indicates "down". Default = 120000 (two minutes). ducc.ws.monitored.daemon.down.millis.expiry=120000 # Optionally configure the Web Server to run on a non-head node # This is the name of the node the web server is started on. If not specified, # the web server is started on ${ducc.head}. # ducc.ws.node = my.node.com # Optionally configure the Web Server IP address # In multi-homed systems it may be necessary to specify to which of the multiple addresses # the Web Server listens for requests. This property is an IP address that specifies to which # address the Web Server listens. # ducc.ws.ipaddress = # Optionally configure the Web Server IP port for HTTP requests, default is 42133 ducc.ws.port = 42133 # Optionally configure the Web Server IP port for HTTPS requests, default is 42155 ducc.ws.port.ssl = 42155 # Optionally configure the Web Server welcome page, default is index.html (which forwards to jobs.html) ducc.ws.welcome.page = index.html # Optionally configure the Web Server DUCC_HOME display value, default is absolute path of DUCC_HOME. ducc.ws.display.home = # Optionally configure the Web Server job automatic cancel timeout, default is 10. To disable # feature specify 0. Employed when user specifies --wait_for_completion flag on job submission, # in which case the job monitor program must visit # http://:/ducc-servlet/proxy-job-status?id= within this expiry time # else the job will be automatically canceled (unless feature disabled) by the Web Server # acting as the administrator ducc (which must be specified in the ducc.administrators file). ducc.ws.automatic.cancel.minutes = 5 # Optionally configure the Web Server max cached (and thus available for display) # history entries for each of Jobs/Reservations/Services ducc.ws.max.history.entries = 4096 # Specify login enabled (default is true) ducc.ws.login.enabled = false # For node visualization - if true, strip domain names from labels for cleaner visuals ducc.ws.visualization.strip.domain = true # Optionally configure the Web Server request log, default is 0 (meaning no request logging) # Logs are written to DUCC_HOME/logs/webserver ducc.ws.requestLog.RetainDays = 30 # Specify one of { unrestricted, encrypted, blocked } to control # requests to the Web Server with responses containing user data. # When "unrestricted" requests for user data via http or https are honored. # When "encrypted" requests for user data only via https are honored. # When "blocked" requests for user data are not honored. ducc.ws.user.data.access = unrestricted # Note: to employ "encrypted" use the following settings: # ducc.ws.port = 42133 # ducc.ws.port.ssl = 42155 # ducc.ws.login.enabled = true # ducc.ws.user.data.access = encrypted # See documentation for further information. # -------------------------------------------------------------- # name: ducc.ws.banner.message # purpose: display banner message on all main pages # choices: default=none # change: effective immediately #ducc.ws.banner.message = Do not adjust your set. This is a message from your DUCC administrator. # NOTE - Feature under devlopment # When set exposes the Experiments page and enables the ducc_jed_submit script ducc.experiments = false # +==================================================================================================+ # | Job Driver | # +==================================================================================================+ # The name of the pluggable java class used to implement the Job Driver (JD). ducc.jd.configuration.class=org.apache.uima.ducc.transport.configuration.jd.JobDriverConfiguration # This is the JMS endpoint name by the Job Driver to send state to the Orchestrator. ducc.jd.state.update.endpoint=ducc.jd.state # This is the JMS message type used to send state to the Orchestrator. ducc.jd.state.update.endpoint.type=topic # The interval in milliseconds between JD state publications to the Orchestrator. # A higher rate (smaller number) may slightly increase system response but will # increase network load. A lower rate will somewhat decrease system response and # lower network load. ducc.jd.state.publish.rate=15000 # This is a human-readable string used to form queue names for the JMS queues used to pass # CASs from the Job Driver to the Job Processes. The completed queue named comprises the prefix # concatenated with the DUCC assigned Job number. ducc.jd.queue.prefix=ducc.jd.queue. # After dispatching a work item to UIMA-AS client for processing, the number of minutes that the # Job Driver will wait for two callbacks (queued and assigned) before considering the work item # lost. The elapsed time for the callbacks is normally sub-second. Intermittent network problems # may cause unusual spikes. If not specified, default value is 5 minutes. ducc.jd.queue.timeout.minutes=5 # If not specified, default value is 24 hrs (24*60 minutes) # This property specifies the default value for the time, in minutes, that the JD should # wait for a work-item to be processed. If processing has not completed in this time the # process is terminated and the job's error handler decides whether to retry the # work-item or not. ducc.default.process.per.item.time.max = 1440 # If not specified, default max time in minutes allowed for AE initialization. # This property specifies the default value for the time, in minutes, that the agent should # wait for a JP to complete initialization. If initialization is not completed in this time # the process is terminated and and InitializationTimeout status is sent to the JD # which decides whether to retry the process or terminate the job. ducc.default.process.init.time.max = 120 # The following 5 values comprise the specification used by the DUCC Orchestrator daemon to # request an allocation from the DUCC Resource Manager for Job Driver use. The values given # below are the defaults. ducc.jd.host.class=JobDriver ducc.jd.host.description=Job Driver ducc.jd.host.memory.size=2GB ducc.jd.host.number.of.machines=1 ducc.jd.host.user=System # For a newly started Job, the number of JP UIMA initialization failures # allowed until at least one JP succeeds - otherwise, the Job self-destructs. # Default is 1. ducc.jd.startup.initialization.error.limit=1 # The next 4 values are related - each JD is assigned a piece of the Job Driver host memory # which, # along with the size of the CR's type system, limits the number of active work-item # CASes in a job. To avoid swapping the max heap size should also be restricted. # Memory size in MB allocated for each JD (default 300) # When CGroups are enabled, this is the RSS, in MB, that is reserved for each JD process, # and enforced by the CGroup support. Larger JDs are permitted, but the CGroup support will # force the excess RSS onto swap. This potentially slows the performance of that JD, but # preserves the resources for other, better-behaved, JDs. ducc.jd.share.quantum = 400 # The number of "slices" of size "jd.share.quantum" kept in reserve. # The Orchestrator makes Reservation requests to RM to get Reservations # (Job Driver hosts) each of which is then subdivided into "slices", one # per JD. This number specifies the number of unused "slices" that should # be kept on-hand in anticipation of newly submitted jobs (default 2). # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # If the value specified is 0 then no JD allocation will take place # and all submitted jobs will be rejected. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ducc.jd.share.quantum.reserve.count = 3 # The maximum length of a work-item name returned by CAS.getDocumentText(). # Truncation is enforced beyond this limit. # If not specified, there is no limit. ducc.jd.workitem.name.maximum.length = 64 # Extra JVM args to be appended to any user-supplied "driver_jvm_args" # Dynamic: changes to this property immediately affect new submitted work # Flag: -DUimaAsCasTracking will add to logs: # UIMA-AS client & service trigger internal tracing including RefId, # UIMA-AS client onBeforeMessageSendHandler callbacks including RefId, and # UIMA-AS client onBeforeProcessCAS callbacks including RefId # Flag: -DUimaAsClientTracking will add to jd.out.log: # core pool size changes, # UIMA-AS client sendAndReceive invocations, # UIMA-AS client onBeforeMessageSendHandler callbacks, and # UIMA-AS client onBeforeProcessCAS callbacks # Note: should contain an Xmx a little below the "ducc.jd.share.quantum" value # the Xmx entry is ignored if the user has specified one ducc.driver.jvm.args = -Xmx300M # Max number of threads in Jetty thread pool servicing incoming # HTTP requests # ducc.driver.jetty.max.threads = 100 # Max idle time for jetty threads (in millis). When a thread exceeds # its idle time it will be terminated. # ducc.driver.jetty.thread.idletime = 60000 # Extra JVM args to be appended to any user-supplied "process_jvm_args" # Dynamic: changes to this property immediately affect new submitted work # ducc.process.jvm.args=-XX:+HeapDumpOnOutOfMemoryError # -------------------------------------------------------------- # name: ducc.jd.communications.scheme # purpose: specify communications scheme between JD and JPs # choices: [http,https] default=https # change: effective immediately for new jobs ducc.jd.communications.scheme=https # -------------------------------------------------------------- # name: ducc.jd.error.handler.class # purpose: specify error handler # default: org.apache.uima.ducc.ErrorHandler # change: effective immediately for new jobs # details: The error handler class is employed when work items fail or time-out in order # to determine what to do next in three dimensions: # job kill or continue, process kill or continue, work item kill or continue. ducc.jd.error.handler.class = org.apache.uima.ducc.ErrorHandler # -------------------------------------------------------------- # name: ducc.jd.error.handler.args # purpose: configure error handler # choices: max_job_errors= max_timeout_retrys_per_workitem= # change: effective immediately for new jobs # details: The error handler args passed to the error handler class (see above). # These values, if any, are combined with any specified by the user. # Individual user specified --driver_exception_handler_arguments # prevail. ducc.jd.error.handler.args = max_job_errors=15 max_timeout_retrys_per_workitem=0 # +==================================================================================================+ # | Service Manager | # +==================================================================================================+ # This is the name of the pluggable java class used to implement the Service Manager. ducc.sm.configuration.class=org.apache.uima.ducc.sm.config.ServiceManagerConfiguration # This is the JMS endpoint name used for API messages received by the Service Manager. ducc.sm.api.endpoint=ducc.sm.api # This is the JMS message type used for API messages received by the Service Manager. ducc.sm.api.endpoint.type=queue # This is the JMS endpoint name used for state messages sent by the Service Manager. ducc.sm.state.update.endpoint=ducc.sm.state # This is the JMS message type used for state messages sent by the Service Manager. ducc.sm.state.update.endpoint.type=topic # Default pinger # This is the name of the default UIMA-AS ping/monitor class. The default class issues # get-meta to a service and uses JMX to fetch queue statistics for presentation in # the Web Server. # This name is either: # - The fully qualified name of the class to use as the default UIMA-AS pinger. It may # be necessary to include the class or jar file in the classpath used to start the SM. # (The recommended way to do this is add an entry to the ducc.local.jars property # in ducc.properties. # - The name of a pinger registration file. This is the recommended way to # provide installation-customized pingers. In short, it resides in ducc.properties # and contains the full set of ping-related properties needed to run a pinger. ducc.sm.default.monitor.class = org.apache.uima.ducc.sm.UimaAsPing # This is the maximum number of consecutive failures of service instance initialization # permitted before DUCC stops creating new instances. When this cap is hit the SM # will disable autostart for the service. It may be overridden by the service # registration's instance_failures_limit parameter. ducc.sm.instance.failure.max 5 # backward compatibility ducc.sm.instance.failure.limit ${ducc.sm.instance.failure.max} # This specifies a window of time in minutes over which some number of service instance # failures are tolerated. If the maximum number of tolerated failures is # exceeded within this time window the Service Manager ceases to restart # instances automatically. The maximum tolerated failures is defined in # ducc.sm.instance.failure.max. # This may be overridden by individual service pingers using the registration # property instance_failures_window. ducc.sm.instance.failure.window 30 # max consecutive instance init failures before we stop trying to start things ducc.sm.init.failure.limit 1 # This is the time, in milliseconds, between pings by the Service Manager # to each known, running service. ducc.sm.meta.ping.rate = 60000 # This is the number of consecutive pings that may be missed before a # service is considered unavailable. ducc.sm.meta.ping.stability = 10 # This is the time in milliseconds the SM waits for a response to a ping. If the service does # not respond within this time the ping is accounted for as a "missed" ping. ducc.sm.meta.ping.timeout = 15000 # This is the HTTP port used by the Service Manager to field requests from the CLI / API. # ***** # NOTE: for backward CLI / API compatibility (2.2.0 and before) this port is the same as # ducc.orchestrator.http.port. This entry can otherwise be deleted post-2.2.0 as it is no # longer used by the current CLI / API. # ***** ducc.sm.http.port=${ducc.orchestrator.http.port} # This is the node where the Service Manager runs. It MUST be ${ducc.head}. ducc.sm.http.node=${ducc.head} # This is the length of time, in milliseconds, that the SM allows a service to remain alive after # all jobs that reference it have exited. If no new job referencing it enters the system before this # time has expired, the SM stops the service. ducc.sm.default.linger=300000 # +==================================================================================================+ # | Orchestrator | # +==================================================================================================+ # This is the name of the pluggable java class used to implement the DUCC Orchestrator. ducc.orchestrator.configuration.class=org.apache.uima.ducc.orchestrator.config.OrchestratorConfiguration # This indicates the level of recovery to be taken on restarting a # system. There are three levels of startup: # [cold] All reservations are canceled, all currently running # jobs (if any) are terminated. All services are terminated. The # system starts with no jobs, reservations, or services active. # [warm] All active work is continued. #------------------------------------------------------------------------------ # cold, // Recover: All active are forced to Completed JD host: employ new # warm, // Recover: All Jobs+Services+Reservations (default) JD host: employ current ducc.orchestrator.start.type=warm #------------------------------------------------------------------------------ # This is the name of the JMS endpoint through which the Orchestrator broadcasts its # state messages. These messages include full job information and can be relatively # large, though they are now compressed. ducc.orchestrator.state.update.endpoint=ducc.orchestrator.state # This is the JMS endpoint type used for the state messages sent by the Orchestrator. ducc.orchestrator.state.update.endpoint.type=topic # The interval in milliseconds between Orchestrator publications of its state. ducc.orchestrator.state.publish.rate=10000 #------------------------------------------------------------------------------ # How long between between maintenance cycles (in milliseconds) # Each maintenance cycle the orchestrator removes obsolete JD queues (MqReaper) # and performs health related activities (HealthMonitor) including: capping JPs # for a Jobs that have too many initialization failures and terminating Jobs whose # JDs have failed. ducc.orchestrator.maintenance.rate=60000 #------------------------------------------------------------------------------ # This is the HTTP port used by the Orchestrator to field requests from the CLI / API. ducc.orchestrator.http.port=19988 # Node where Orchestrator is running. It MUST be ${ducc.head}. ducc.orchestrator.http.node=${ducc.head} #------------------------------------------------------------------------------ # Specify if reserving an entire machine is allowed # Allowing could be a bad idea when a cluster has very few, very large machines. ducc.orchestrator.unmanaged.reservations.accepted=true #------------------------------------------------------------------------------ # This is the name of the JMS endpoint through which the daemons broadcast state changes ducc.daemons.state.change.endpoint=ducc.daemons.state.change # This is the JMS endpoint type used for the state change messages sent by the daemons. ducc.daemons.state.change.endpoint.type=queue # +==================================================================================================+ # | Resource Manager (aka Scheduler) | # +==================================================================================================+ # This is the name of the pluggable java class used to implement the DUCC Resource # Manager. ducc.rm.configuration.class=org.apache.uima.ducc.rm.config.ResourceManagerConfiguration # This is the name of the JMS endpoint through which the Resource Manager broadcasts its # state. ducc.rm.state.update.endpoint=ducc.rm.state # This is the JMS endpoint type used for state messages sent by the Resource Manager. ducc.rm.state.update.endpoint.type=topic # This specifies the frequency of RM schedules, relative to the number of Orchestrator publications. # If the value is set to 1, RM runs and publishes a schedule immediately on receipt of OR state. # If set to some number N, RM runs a schedule after receipt of every N Orchestrator publications. ducc.rm.state.publish.ratio = 1 # This specifies the maximum non-preemptable shares any user may be awarded, in GB. If not configured, # there is no maximum enforced. This can be overridden on a per-user basis in the user registry. #ducc.rm.global_allotment = 360 # The share quantum is the smallest amount of RAM that is schedulable for jobs, in GB. # Jobs are scheduled based entirely on their memory requirements. Memory is allocated in # multiples of the share quantum. ducc.rm.share.quantum = 1 # The component that implements the scheduling algorithm is pluggable. This specifies the # name of that class. ducc.rm.scheduler = org.apache.uima.ducc.rm.scheduler.NodepoolScheduler # File defining the scheduler classes - found in DUCC_HOME/resources ducc.rm.class.definitions = ducc.classes # File defining the user registry. Optional, need not exist. Found in DUCC_HOME/resources. # The registry is used only to override the global allotments. The registry entries may also # be placed in the ducc.classes file if desired. ducc.rm.user.registry = ducc.users # default memory, in GB, if not specified ducc.rm.default.memory = 4 # Number of node metrics heartbeats to wait for before rm starts up. # During DUCC initialization the Resource Manager must wait some period of time for # all the nodes in the cluster to check-in via their "heartbeats". If the RM were to start # scheduling too soon there would be a period of significant "churn" as the perceived cluster # configurations changes rapidly. As well, it would be impossible to recover work in a warm # or hot start if the affected nodes had not yet checked in. # The init.stability property indicates how many heartbeat intervals the RM must wait before # it starts scheduling after initialization. ducc.rm.init.stability = 2 # Number of missed node metrics updates to consider node down. # The RM receives regular "heartbeats" from the DUCC agents in order to know what # nodes are available for scheduling. The node.stability property configures the number of # consecutive heartbeats that may be missed before the Resource Manager considers the # node to be inoperative. # If a node becomes inoperative, the Resource Manager deallocates all processes on that # node and attempts to reallocate them on other nodes. The node is marked offline and is # unusable until its heartbeats start up again. # The default configuration declares the agent heartbeats to occur at 30 second intervals. # (see ducc.agent.node.metrics.publish.rate) # Therefore heartbeats must be missed for 3 minutes before the Resource Manager takes # corrective action. ducc.rm.node.stability = 6 # Which policy to use when shrinking/evicting shares - alternatively, SHRINK_BY_MACHINE. # The eviction.policy is a heuristic to choose which processes of a job to preempt because of # competition from other jobs. # The SHRINK_BY_INVESTMENT (default) policy attempts to preempt processes such that the # least amount of work is lost. It chooses candidates for eviction in order of: # - Processes still initializing, with the smallest time spent in the initializing step. # - Processes whose currently active work items have been executing for the shortest time. # The SHRINK_BY_MACHINE policy attempts to preempt processes so as to minimize # fragmentation on machines with large memories that can contain multiple job processes. # No consideration of execution time or initialization time is made. ducc.rm.eviction.policy = SHRINK_BY_INVESTMENT # Max nodes to initially allocate until init is complete. # The type of jobs supported by DUCC generally have very long and often fragile # initialization periods. Errors in the applications and other problems such is missing or # errant services can cause processes to fail during this phase. # To avoid preempting running jobs and allocating a large number of resources to jobs only # to fail during initialization, the Resource Manager schedules a small number of processes # until it is determined that the initialization phase will succeed. # The initialization.cap determines the maximum number of processes allocated to a job # until at least one process successfully initializes. Once any process initializes the Resource # Manager will proceed to allocate the job its full fair share of processes. # The initialization cap can be overridden on a class basis by configuration via ducc.classes. ducc.rm.initialization.cap = 1 # When true, jobs expand not all at once after init, but a bit slower, doubling each epoch # until max fair-share is set. If false, jobs increase immediately to their fair share, # at the cost of mass evictions. # Expand.by.doubling can be overridden on a class basis by configuration via ducc.classes. ducc.rm.expand.by.doubling = true # Predict when a job will end and avoid expanding if not needed. # Because initialization time may be very long, it may be the case that a job that might be # eligible for expansion will be able to complete in the currently assigned shares before any # new processes are able to complete their initialization. In this case expansion results in # waste of resources and potential eviction of processes that need not be evicted. # The Resource Manager monitors the rate of task completion and attempts to predict the # maximum number of processes that will be needed at a time in the future based on the # known process initialization time. If it is determined that expansion is unnecessary then it # is not done for the job. # Prediction can be overridden on a class basis by configuration via ducc.classes. ducc.rm.prediction = true # Add this fudge factor (milliseconds) to the expansion target when using prediction. # When ducc.rm.prediction is enabled, the known initialization time of a job's processes plus # some "fudge" factor is used to predict the number of future resources needed. The "fudge" # is specified in milliseconds. # The default "fudge" is very conservative. Experience and site policy should be used to set a # more practical number. # Prediction.fudge can be overridden on a class basis by configuration via ducc.classes. ducc.rm.prediction.fudge = 120000 # What is minimum number of processes for a job before we do defrag? If a job has less than # this amount RM may attempt defragmentation to bring the processes up to this value. # If enabled, limited defragmentation of resources is performed by the Resource Manager to # create sufficient space to schedule work that has insufficient resources (new jobs, for # example.). The term "insufficient" is defined as "needing more processes than the # defragmentation # threshold, but currently having fewer processes than the defragmentation # threshold." These are called "needy" jobs. Additionally, the Resource Manager # will never evict processes from ``needy'' jobs for the purpose of defragmentation. # This property allows installations to customize the value used to determine if a # job is "needy". Jobs with fewer processes than this are potentially needed, and # jobs with more processes are never needy. ducc.rm.fragmentation.threshold = 8 # This JMS endpoint used for RM administrative requests. ducc.rm.admin.endpoint = ducc.rm.admin.channel # This is the JMS endpoint type used for RM administrative requests. ducc.rm.admin.endpoint.type = queue # This JMS endpoint used for RM administrative requests. ducc.rm.via.or.admin.endpoint = ducc.rm.via.or.admin.channel # This is the JMS endpoint type used for RM administrative requests. ducc.rm.via.or.admin.endpoint.type = queue # Unmanaged Reservations are usually assigned to machines whose size exactly matches the # requested size, after rounding up to a multiple of the node pool's quantum. # When this property is positive, machines up to this many GB larger than the rounded up # size will be considered. ducc.rm.reserve_overage = 0 # +==================================================================================================+ # | Agents | # +==================================================================================================+ # This is the name of the pluggable java class used to implement the DUCC Agents. ducc.agent.configuration.class=org.apache.uima.ducc.agent.config.AgentConfiguration # This is the JMS endpoint through which agents receive state from the Process Manager. ducc.agent.request.endpoint=ducc.agent # This is the JMS endpoint type used for state messages sent by the Process Manager. ducc.agent.request.endpoint.type=topic # This is the JMS endpoint used to communicate # from the managed process to the Agent (Job Process). ducc.agent.managed.process.state.update.endpoint=ducc.managed.process.state.update # This is the JMS endpoint type used to communicate # from the managed process (Job Process) to the Agent. ducc.agent.managed.process.state.update.endpoint.type=socket # Endpoint parameters which are transport specific. # For socket transport params are: # - transferExchange=true - include Camel Exchange wrapper in a message # - synch=false - use socket transport for one-way messaging (no replies neeeded) ducc.agent.managed.process.state.update.endpoint.params=transferExchange=true&sync=false # ***** Note: the Resource Manager uses the data in the node metrics for scheduling. # The interval in milliseconds between node metric publications. # Every agent publishes its updates at this rate. On large clusters, a high rate (small # interval) can be a burden on the network. ducc.agent.node.metrics.publish.rate=30000 # This is the JMS endpoint used to send node metrics updates to listeners. Listeners # are usually the Resource Manager and Web Server. These messages serve as node # "heartbeats". As well, the node metrics heartbeats contain the amount of RAM on the node # and the number of processors. ducc.agent.node.metrics.endpoint=ducc.node.metrics # This is the JMS endpoint type used to send node metrics updates from the agents. ducc.agent.node.metrics.endpoint.type=topic # The interval in milliseconds between node inventory publications. # If the inventory has not changed since the last update the agent bypasses sending the # update, up to a maximum of ducc.agent.node.inventory.publish.rate.skip times. ducc.agent.node.inventory.publish.rate=10000 # This is the number of times the agent will bypass publishing its node inventory if the # inventory has not changed. ducc.agent.node.inventory.publish.rate.skip=3 # This is the JMS endpoint used to send node inventory messages to listeners. Listeners are # usually the Orchestrator and Web Server. Information in these messages include a map of # processes being managed on the node. ducc.agent.node.inventory.endpoint=ducc.node.inventory # This is the JMS endpoint type used to send node inventory updates from the agents. ducc.agent.node.inventory.endpoint.type=topic # ducc.agent.launcher.thread.pool.size - Deprecated #ducc.agent.launcher.thread.pool.size=10 # enable/disable use of ducc_ling # ducc.agent.launcher.use.ducc_spawn: true, use ducc_ling. Default: false ducc.agent.launcher.use.ducc_spawn=true # Specify location of ducc_ling in the filesystem. # This property specifies the full path to the ducc_ling utility. During installation ducc_ling # is normally moved to local disk and given setuid-root privileges. Use this property to tell # the DUCC agents the location of the installed ducc_ling. The default location is within # an architecture dependent subdiretory of DUCC_HOME/admin. # The arcitecture is derived from the JRE property os.arch. During DUCC installation # the ducc_ling utility is compiled for the architecture of the host where DUCC is installed. # In heterogeneous clusters, the system administrator should run the utility build_duccling # once on a machine of each architecture to insure this utility gets correctly installed. ducc.agent.launcher.ducc_spawn_path=${DUCC_HOME}/admin/${os.arch}/ducc_ling # Max amount of time (in millis) agent allows the process to stop before issuing kill -9. # This property specifies the time, in milliseconds, the agent should wait before forcibly # terminating a job process (JP) after an attempted graceful shutdown. If the child process # does not terminate in the specified time, it is forcibly terminated with kill -9. # This type of stop can occur because of preemption or system shutdown. ducc.agent.launcher.process.stop.timeout=60000 # Max time in millis allowed for AE initialization. Default 2 hours 7200000. - Deprecated #ducc.agent.launcher.process.init.timeout=7200000 # Exclude the following user ids while detecting rogue processes # The DUCC Agents scan nodes for processes that should not be running; for example, # a job may have left a 'rogue' process alive when it exits, or a user may log in to a node # unexpectedly. These processes are reported to the administrators via the webserver for # possible action. # This configuration parameter enumerates userids which are ignored by the rogue-process scan. # default = root,posstfix,ntp,nobody,daemon,100 ducc.agent.rogue.process.user.exclusion.filter= # Max UID reserved by OS. This is used to detect rogue processes and to report # available memory on a node. # The ducc.agent.node.metrics.sys.uid.max property is droped in favor of # ducc.agent.rogue.process.sys.user.max, which seems more descriptive # # ducc.agent.node.metrics.sys.uid.max=500 ducc.agent.rogue.process.sys.uid.max=500 # Exclude the following processes while detecting rogue processes # The DUCC Agents scan nodes for processes that should not be running; for example, # a job may have left a 'rogue' process alive when it exits, or a user may log in to a node # unexpectedly. These processes are reported to the administrators via the webserver for # possible action. # This configuration parameter enumerates processes by name which are ignored by the # rogue process detector. # default = sshd:,-bash,-sh,/bin/sh,/bin/bash,grep,ps ducc.agent.rogue.process.exclusion.filter=sshd:,-bash,-sh,/bin/sh,/bin/bash,grep,ps # Fudge Factor (in terms of percantage) that agent uses to multiply a share size when # determining if a JP exceeds its alloted memory, which is calculated as follows # (fudge factor/100)*share size + share size. If this number exceeds JPs RSS, the agent # kills the process. # The DUCC agent monitors the size of the resident memory of its spawned processes. If a # process exceeds its declared memory size by any significant amount it is terminated and # a ShareSizeExceeded message is sent. The Job Driver counts this towards the maximum # errors for the job and will eventually terminate the job if excessive such errors occur. # This property defines the percentage over the declared memory size that a process is # allowed to grow to before being terminated. # To disable this feature, set the value to -1. ducc.agent.share.size.fudge.factor=5 # Enable/Disable CGroup support. # If CGroups are not installed on a specific machine, this is ignored. # With CGroups the RSS for a managed process (plus any children processes it may spawn) is # limited to the allocated share size. Additional memory use goes to swap space. DUCC # monitors and limits swap use to the same proportion of total swap space as allocated # share size is to total RAM. If a process exceeds its allowed swap space it is terminated # and a ShareSizeExceeded message is sent to the Job Driver. # Nodes not using CGroups fall back to the ducc.agent.share.size.fudge.factor. ducc.agent.launcher.cgroups.enable=true # Define location of cgroups utils like cgexec. For mixed clusters with different # flavors of unix, the utilities may be in different places. In such case, add a # a comma separated list of paths like this: # ducc.agent.launcher.cgroups.utils.dir=/usr/bin,, ducc.agent.launcher.cgroups.utils.dir=/usr/bin,/bin # Set cgroup memory.swappiness ducc.agent.launcher.cgroups.swappiness=10 # Number of retries to use when cgcreate fails ducc.agent.launcher.cgroups.max.retry.count=1 # Amount by which to increase delay (in msecs) between each retry # Defines a delay factor in millis an agent will use to determine how long it # should sleep between cgroup create retries. Each time cgcreate fails # an agent will increment the delay by adding the delay factor to a previous # delay. If the delay factor is 2000, on first cgcreate failure an agent will # wait 2000 ms, on second failure the wait time will be 4000, on third 6000, and # so on. ducc.agent.launcher.cgroups.retry.delay.factor=2000 # Exclusion file to enable node based exclusion for cgroups and aps # syntax: =cgroups,ap # the above will exclude node from using cgroups and/or prevent deployment of APs ducc.agent.exclusion.file=${DUCC_HOME}/resources/exclusion.nodes # Define script which will collect total swap used by a given process. This # script is launched by an agent via duccling and running as the owner # of the process. ducc.agent.swap.usage.script=${DUCC_HOME}/admin/ducc_get_process_swap_usage.sh # Uncomment the following line to support auto reaping of rogue processes by Ducc's Agent # ducc.agent.rogue.process.reaper.script=${DUCC_HOME}/admin/ducc_reap_rogues.sh # +==================================================================================================+ # | Process Manager | # +==================================================================================================+ # This is the name of the pluggable java class used to implement the DUCC Process Manager. ducc.pm.configuration.class=org.apache.uima.ducc.pm.config.ProcessManagerConfiguration # This is the endpoint through which process manager receive state from the Orchestrator. ducc.pm.request.endpoint=ducc.pm # This is the JMS endpoint type used for state messages sent by the Orchestrator. ducc.pm.request.endpoint.type=queue # This is the endpoint through which process manager sends its heartbeat. The main receiver # is the Web Server for it's daemon status page. ducc.pm.state.update.endpoint=ducc.pm.state # This is the JMS endpoint type used for process manager heartbeats. The primary receiver # is the Web Server for its daemon status page. ducc.pm.state.update.endpoint.type=topic # The interval in milliseconds between process manager heartbeat publications. ducc.pm.state.publish.rate=15000 # +==================================================================================================+ # | Job Process | # +==================================================================================================+ # This is the name of the pluggable java class that implements # the shell for Agent launched user processes. ducc.uima-as.configuration.class=org.apache.uima.ducc.transport.configuration.jp.JobProcessConfiguration ducc.job-process.configuration.class=org.apache.uima.ducc.transport.configuration.jp.JobProcessConfiguration ducc.service.configuration.class=org.apache.uima.ducc.transport.configuration.service.ServiceConfiguration # This is the endpoint through which job processes (JPs) receive messages from the Agents. ducc.uima-as.endpoint=ducc.job.managed.service # This is the JMS endpoint type used for messages sent to the JPs from the Agents. ducc.uima-as.endpoint.type=socket # Endpoint parameters which are transport specific. # For socket transport params are: # - transferExchange=true - include Camel Exchange wrapper in a message # - synch=false - use socket transport for one-way messaging (no replies neeeded) ducc.uima-as.endpoint.params=transferExchange=true&sync=false # saxon8.jar - Saxon is an XSLT and XQuery processor. It is used by UIMA-AS # when deploying a service. ducc.uima-as.saxon.jar.path=file:${DUCC_HOME}/apache-uima/saxon/saxon8.jar # dd2spring.xsl - contains a set of rules used when transforming # UIMA-AS deployment descriptor into SpringFramework's # context file used to deploy UIMA-AS based service. ducc.uima-as.dd2spring.xsl.path=${DUCC_HOME}/apache-uima/bin/dd2spring.xsl # Maximum amount of time to wait for a response from the JD. This value # is used by the JP when sending requests to the JD. ducc.process.request.timeout=30000 # Define process container class for DD jobs to instantiate and invoke via reflection. # The container provides classpath isolation for user defined analytics. # The container is instantiated with classes from a System classloader. ducc.process.uima.as.container.class = org.apache.uima.ducc.user.jp.UimaASProcessContainer # Define process container class for non-DD jobs to instantiate and invoke via reflection. # The container provides classpath isolation for user defined analytics. # The container is instantiated with classes from a System classloader. ducc.process.uima.container.class = org.apache.uima.ducc.user.jp.UimaProcessContainer # Define the sleep time (millis) for JP to use when JD sends empty CAS. In this case the # JD's CR has processed its collection. The JP threads need to slow down sending # requests ducc.process.thread.sleep.time=60000 # Custom Flow Controller to use for Ducc Job Processes that don't provide a process_DD descriptor ducc.flow-controller.specifier=org.apache.uima.ducc.FlowController # +==================================================================================================+ # | Database | # +==================================================================================================+ # Name of the database host(s) - initially disabled until database is created. # Overridden in site.ducc.properties by ducc_post_install or db_create. # This is the name of the host(s) where the database is run. It usually defaults to the # same host as the ducc.head. Those knowledgable of the database can install the # database elsewhere. Use this parameter to specify that location(s). # To disable use of the database, set this parameter to the string --disabled-- ducc.database.host.list = --disabled-- # Database JMX host. Default is to allow only local JMX access. Specify the real name for remote access. ducc.database.jmx.host = localhost # Database JMX port. ducc.database.jmx.port = 7199 # If set to true, DUCC will start and stop the Cassandra database as part of its normal # start/stop scripting. ducc.database.automanage = true # Path to Cassandra database home ducc.database.home = ${DUCC_HOME}/cassandra-server #----------------------------------- # DATABASE MEMORY TUNING # WARNING: if you're not sure what these do you should probably not change them as incorrect # values can adversly affect the database, the database node, or both. # Given that. If you're running on a mostly small system, e.g. 8GB or less, probably you # should set ducc.database.mem.heap to 1 or 2 GB and ducc.database.mem.new to 100M # # By default we allow cassandra to calculate these for us. # # IMPORTANT: if you set one of these you MUST set both, or cassandra will refuse to start. #----------------------------------- # Database xmx. If not set, cassandra will calculate it for you. # This is the value used to set {\em Xmx and Xms} when the database starts. The # Cassandra database makes an attempt to determine the best value of this. The # default is one-half of real memory, up to a maximum of 8G. It is recommended that # the default be used. However, small installations may reduce this to as little # as 512M. Note that both Xmx and Xms are set. #ducc.database.mem.heap = 4G # Database setting for 'young' generation. If not set, cassandra will calculate it for you. #ducc.database.mem.new = 100M #----------------------------------- k# END OF DATABASE MEMORY TUNING #----------------------------------- # This class implementes the persistence interface for the services registry #ducc.service.persistence.impl = org.apache.uima.ducc.database.StateServicesDb ducc.service.persistence.impl = org.apache.uima.ducc.common.persistence.services.StateServices # This class implements the persistence interface for Job, Reservation, Service, and AP instances, plus # the Orchestrator checkpoint. #ducc.job.history.impl = org.apache.uima.ducc.database.HistoryManagerDb ducc.job.history.impl = org.apache.uima.ducc.transport.event.common.history.HistoryPersistenceManager #ducc.rm.persistence.impl = org.apache.uima.ducc.database.RmStatePersistence ducc.rm.persistence.impl = org.apache.uima.ducc.common.persistence.rm.NullRmStatePersistence # end database things -------------------------------------------------------------------------------