# Cassandra storage config YAML 
# See http://wiki.apache.org/cassandra/StorageConfiguration for
# explanations of configuration directives.

# name of the cluster
cluster_name: 'Test Cluster'

# Set to true to make new [non-seed] nodes automatically migrate data
# to themselves from the pre-existing nodes in the cluster.  Defaults
# to false because you can only bootstrap N machines at a time from
# an existing cluster of N, so if you are bringing up a cluster of
# 10 machines with 3 seeds you would have to do it in stages.  Leaving
# this off for the initial start simplifies that.
auto_bootstrap: false

# See http://wiki.apache.org/cassandra/HintedHandoff
hinted_handoff_enabled: true

# authentication backend, implementing IAuthenticator; used to limit keyspace access
authenticator: org.apache.cassandra.auth.AllowAllAuthenticator

# any IPartitioner may be used, including your own as long as it is on
# the classpath.  Out of the box, Cassandra provides
# org.apache.cassandra.dht.RandomPartitioner
# org.apache.cassandra.dht.OrderPreservingPartitioner, and
# org.apache.cassandra.dht.CollatingOrderPreservingPartitioner.
partitioner: org.apache.cassandra.dht.RandomPartitioner

# directories where Cassandra should store data on disk.
data_file_directories:
    - /var/lib/cassandra/data

# Addresses of hosts that are deemed contact points. 
# Cassandra nodes use this list of hosts to find each other and learn
# the topology of the ring.  You must change this if you are running
# multiple nodes!
seeds:
    - 127.0.0.1

# Access mode.  mmapped i/o is substantially faster, but only practical on
# a 64bit machine (which notably does not include EC2 "small" instances)
# or relatively small datasets.  "auto", the safe choice, will enable
# mmapping on a 64bit JVM.  Other values are "mmap", "mmap_index_only"
# (which may allow you to get part of the benefits of mmap on a 32bit
# machine by mmapping only index files) and "standard".
# (The buffer size settings that follow only apply to standard,
# non-mmapped i/o.)
disk_access_mode: auto

# Unlike most systems, in Cassandra writes are faster than reads, so
# you can afford more of those in parallel.  A good rule of thumb is 2
# concurrent reads per processor core.  Increase ConcurrentWrites to
# the number of clients writing at once if you enable CommitLogSync +
# CommitLogSyncDelay. -->
concurrent_reads: 8
concurrent_writes: 32

# This sets the amount of memtable flush writer threads.  These will
# be blocked by disk io, and each one will hold a memtable in memory
# while blocked. If you have a large heap and many data directories,
# you can increase this value for better flush performance.
# By default this will be set to the amount of data directories defined.
#memtable_flush_writers: 1

# Buffer size to use when performing contiguous column slices. 
# Increase this to the size of the column slices you typically perform
sliced_buffer_size_in_kb: 64

# TCP port, for commands and data
storage_port: 7000

# Address to bind to and tell other nodes to connect to. You _must_
# change this if you want multiple nodes to be able to communicate!
listen_address: localhost

# The address to bind the Thrift RPC service to
rpc_address: localhost
# port for Thrift to listen on
rpc_port: 9160

# Frame size for thrift (maximum field length).
# 0 disables TFramedTransport in favor of TSocket. 
thrift_framed_transport_size_in_mb: 15

# The max length of a thrift message, including all fields and
# internal thrift overhead.
thrift_max_message_length_in_mb: 16

snapshot_before_compaction: false

# The threshold size in megabytes the binary memtable must grow to,
# before it's submitted for flushing to disk.
binary_memtable_throughput_in_mb: 256
# The maximum time to leave a dirty memtable unflushed.
# (While any affected columnfamilies have unflushed data from a
# commit log segment, that segment cannot be deleted.)
# This needs to be large enough that it won't cause a flush storm
# of all your memtables flushing at once because none has hit
# the size or count thresholds yet.
memtable_flush_after_mins: 60
# Size of the memtable in memory before it is flushed
memtable_throughput_in_mb: 64
# Number of objects in millions in the memtable before it is flushed
memtable_operations_in_millions: 0.3

column_index_size_in_kb: 64

in_memory_compaction_limit_in_mb: 64

# commit log
commitlog_directory: /var/lib/cassandra/commitlog

# Size to allow commitlog to grow to before creating a new segment 
commitlog_rotation_threshold_in_mb: 128

# commitlog_sync may be either "periodic" or "batch." 
# When in batch mode, Cassandra won't ack writes until the commit log
# has been fsynced to disk.  It will wait up to
# CommitLogSyncBatchWindowInMS milliseconds for other writes, before
# performing the sync.
commitlog_sync: periodic

# the other option is "timed," where writes may be acked immediately
# and the CommitLog is simply synced every commitlog_sync_period_in_ms
# milliseconds.
commitlog_sync_period_in_ms: 10000

# Time to wait for a reply from other nodes before failing the command 
rpc_timeout_in_ms: 10000

# phi value that must be reached for a host to be marked down.
# most users should never need to adjust this.
# phi_convict_threshold: 8

# endpoint_snitch -- Set this to a class that implements
# IEndpointSnitch, which will let Cassandra know enough
# about your network topology to route requests efficiently.
# Out of the box, Cassandra provides
# org.apache.cassandra.locator.SimpleSnitch,
# org.apache.cassandra.locator.RackInferringSnitch, and
# org.apache.cassandra.locator.PropertyFileSnitch.
endpoint_snitch: org.apache.cassandra.locator.SimpleSnitch

# dynamic_snitch -- This boolean controls whether the above snitch is
# wrapped with a dynamic snitch, which will monitor read latencies
# and avoid reading from hosts that have slowed (due to compaction,
# for instance)
dynamic_snitch: true

# request_scheduler -- Set this to a class that implements
# RequestScheduler, which will schedule incoming client requests
# according to the specific policy. This is useful for multi-tenancy
# with a single Cassandra cluster.
# NOTE: This is specifically for requests from the client and does
# not affect inter node communication.
# org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place
# org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of
# client requests to a node with a sepearte queue for each
# reques_scheduler_id. The requests are throttled based on the limit set
# in throttle_limit in the requeset_scheduler_options
request_scheduler: org.apache.cassandra.scheduler.NoScheduler

# Scheduler Options vary based on the type of scheduler
# NoScheduler - Has no options
# RoundRobin
#  - throttle_limit -- The throttle_limit is the number of in-flight
#                      requests per client.  Requests beyond 
#                      that limit are queued up until
#                      running requests can complete.
#                      The value of 80 here is twice the number of
#                      concurrent_reads + concurrent_writes.
# request_scheduler_options:
#    throttle_limit: 80

# request_scheduler_id -- An identifer based on which to perform
# the request scheduling. The current supported option is "keyspace"
request_scheduler_id: keyspace

# A ColumnFamily is the Cassandra concept closest to a relational table. 
#
# Keyspaces are separate groups of ColumnFamilies.  Except in very
# unusual circumstances you will have one Keyspace per application.
#
# Keyspace required parameters:
# - name: name of the keyspace; "system" and "definitions" are 
#   reserved for Cassandra Internals.
# - replica_placement_strategy: the class that determines how replicas
#   are distributed among nodes. Contains both the class as well as
#   configuration information.  Must extend AbstractReplicationStrategy.
#   Out of the box, Cassandra provides 
#     * org.apache.cassandra.locator.RackUnawareStrategy 
#     * org.apache.cassandra.locator.RackAwareStrategy
#     * org.apache.cassandra.locator.DatacenterShardStrategy
#
#   RackUnawareStrategy is the simplest; it simply places the first
#   replica at the node whose token is closest to the key (as determined
#   by the Partitioner), and additional replicas on subsequent nodes
#   along the ring in increasing Token order.
# 
#   RackAwareStrategy is special cased for replication_factor of 3.  It
#   places one replica in each of two datacenters, and the third on a
#   different rack in in the first.
#
#   DatacenterShardStrategy is a generalization of RackAwareStrategy.
#   For each datacenter, you can specify how many replicas you want
#   on a per-keyspace basis.  Replicas are placed on different racks
#   within each DC, if possible. This strategy also requires rack aware
#   snitch, such as RackInferringSnitch or PropertyFileSnitch.
#   An example:
#    - name: Keyspace1
#      replica_placement_strategy: org.apache.cassandra.locator.DatacenterShardStrategy
#      strategy_options:
#        DC1 : 3
#        DC2 : 2
#        DC3 : 1
# 
# - replication_factor: Number of replicas of each row
# - column_families: column families associated with this keyspace
#
# ColumnFamily required parameters:
# - name: name of the ColumnFamily.  Must not contain the character "-".
# - compare_with: tells Cassandra how to sort the columns for slicing
#   operations. The default is BytesType, which is a straightforward
#   lexical comparison of the bytes in each column.  Other options are
#   AsciiType, UTF8Type, LexicalUUIDType, TimeUUIDType, LongType,
#   and IntegerType (a generic variable-length integer type).
#   You can also specify the fully-qualified class name to a class of
#   your choice extending org.apache.cassandra.db.marshal.AbstractType.
#
# ColumnFamily optional parameters:
# - keys_cached: specifies the number of keys per sstable whose
#   locations we keep in memory in "mostly LRU" order.  (JUST the key
#   locations, NOT any column values.) Specify a fraction (value less
#   than 1) or an absolute number of keys to cache.  Defaults to 200000
#   keys.
# - rows_cached: specifies the number of rows whose entire contents we
#   cache in memory. Do not use this on ColumnFamilies with large rows,
#   or ColumnFamilies with high write:read ratios. Specify a fraction
#   (value less than 1) or an absolute number of rows to cache.
#   Defaults to 0. (i.e. row caching is off by default)
# - comment: used to attach additional human-readable information about 
#   the column family to its definition.
# - read_repair_chance: specifies the probability with which read
#   repairs should be invoked on non-quorum reads.  must be between 0
#   and 1. defaults to 1.0 (always read repair).
# - preload_row_cache: If true, will populate row cache on startup.
#   Defaults to false.
# - gc_grace_seconds: specifies the time to wait before garbage
#   collecting tombstones (deletion markers). defaults to 864000 (10
#   days). See http://wiki.apache.org/cassandra/DistributedDeletes
#
# NOTE: this keyspace definition is for demonstration purposes only.
#       Cassandra will not load these definitions during startup. See
#       http://wiki.apache.org/cassandra/FAQ#no_keyspaces for an explanation.
keyspaces:
    - name: Keyspace1
      replica_placement_strategy: org.apache.cassandra.locator.RackUnawareStrategy
      replication_factor: 1
      column_families:
        - name: Standard1
          compare_with: BytesType

        - name: Standard2
          compare_with: UTF8Type
          read_repair_chance: 0.1
          keys_cached: 100
          gc_grace_seconds: 0

        - name: StandardByUUID1
          compare_with: TimeUUIDType
          clock_type: Timestamp
          reconciler: TimestampReconciler          

        - name: Super1
          column_type: Super
          compare_with: BytesType
          compare_subcolumns_with: BytesType

        - name: Super2
          column_type: Super
          compare_subcolumns_with: UTF8Type
          preload_row_cache: true
          rows_cached: 10000
          keys_cached: 50
          comment: 'A column family with supercolumns, whose column and subcolumn names are UTF8 strings'

        - name: Super3
          column_type: Super
          compare_with: LongType
          comment: 'A column family with supercolumns, whose column names are Longs (8 bytes)'