Configure

The bulk of Nyb’s behavior is configured through a TOML configuration file. In addition to that, the dependency graph is given in a JSON file.

The Nyb configuration file

In this configuration file, it is possible to use Go templates, and the Sprig functions are loaded. Go templates can be used to, e.g., expand environment variables in the configuration, either as part of values, or for conditionally setting up some sections.

The general form of the configuration file is given below. This configuration is customized to meet your needs when Nyb is deployed on your systems.

# The Graph section tells the incremental builder where to find and
# how to build the dependency graph.  The dependency graph gives a list of
# all the build and test actions and in which order they must execute.
[Graph]
# Command is the program to optionally call to get the dependency graph.
# This program should output the dependency graph in a JSON file at the path
# Graph.File.  If Command is not given, no program is called, and the
# dependency graph is supposed to be already there at Graph.File.
Command = ""
# Args is a slice of command-line arguments to pass to Command.
Args = []
# File is the path to the file, relative to the workspace directory, where
# to find the dependency graph, in JSON format.
File = "nyb_graph.json"

# The Execution section configures how the incremental builder executes
# the build and test actions.
[Execution]
# WorkerName is a name used for reporting purposes.  It gives provenance
# info to the action results stored in cache.  WorkerName helps
# to identify the machine on which a given result was produced.
WorkerName = "local"
# MaxOpenFiles is the maximum number of files to open at the same time.
# On Linux and Mac OSX, this setting must be coherent with `ulimit -n`.
MaxOpenFiles = {{ default "1000" (env "MAX_OPEN_FILES") }}
# HasRootLocallyPolicy is the policy to apply when checking whether the
# output root of an action exists locally.  This is used to know whether
# the output root must be downloaded again, and the action executed again,
# dependending on the caching behavior for a rule.  "CheckDigests" compares
# the file digests of the local root with the file digests in the cache,
# whereas "CheckFilesExist" only looks at whether the output files and folders
# exist (they are the ones specified in the "outputs" field of the rule).
# "CheckFilesExist" is useful if the action is not reproducible and the caching
# behavior is set to "runOverCache" or "alwaysRun": this way, unwarranted
# executions are skipped.  However, "CheckFilesExist" trusts that no other
# process but Nyb changes the output files.  Otherwise, you might end up
# with inconsistencies.  Therefore, it's good to set HasRootLocallyPolicy to
# "CheckDigests" by default, unless unwarranted execution of actions is an
# important burden, in which case "CheckFilesExist" is preferable.  In the
# case of "CheckFilesExist", invalidation can only occur with the
# "--invalidate" or "--invalidate_all" command-line options.
HasRootLocallyPolicy = "CheckFilesExist"
# Pools is a map of tags to pool dimensions.  Pools restrict the number of
# rules with the given tags that can execute concurrently on the same worker.
# The "all" tag is implicitly given to all the rules, so it can be used
# to globally limit the concurrent number of actions to the system's
# dimensions.  The spec for the number of concurrent executions is either
# an integer, giving the number directly, or an expression "<float>cpus",
# giving the number as a floating-point multiple of the number of cores on
# the worker.
Pools = {
  all = "1cpus",
  tag_for_expensive_e2e_tests = "1" }
# Priority is a list of tags that must be executed in decreasing
# order of priority, when the scheduler has the choice between executing
# multiple actions.  This is useful, for instance, to instruct the scheduler
# to run build actions before unit tests, and unit tests before
# end-to-end tests.
Priority = ["build", "test", "docker"]
# ActionDigestsOutputFile is the optional path to a text file where all the
# action digests are outputted, one by line, along with the targets for the
# action digests.  This file can be used as an input when copying caches
# (see `nyb copy_cache`).
ActionDigestsOutputFile = "action_digests.txt"

# The Execution.Executor section configures the executor itself.  The executor
# is a program external to Nyb, and with which Nyb communicates via an RPC
# protocol, that is responsible for the actual execution of the actions.
[Execution.Executor]
# Command is the program for the executor.
Command = ""
# Args is a slice of command-line arguments to pass to Command.
Args = []

# Optionally, it is possible to set a global, distributed execution lock, to
# prevent multiple workers to execute the same action at the same time.
# Executing the same action at the same time is not a race condition, but it
# can lead to waste of resources.  A global execution lock is especially
# useful in CI when one wants to use a distributed farm of workers
# to build a monorepo on multiple machines.
#
# The Execution.Lock.AzureCosmosDB section implements the execution lock as a
# collection on an Azure CosmosDB database accessible via a MongoDB interface.
# The collection must be set up with strong consistency for the lock to be
# effective.
[Execution.Lock.AzureCosmosDB]
# URI is the MongoDB URI to access CosmosDB.
URI = "mongodb://..."
# Database is the CosmosDB database.
Database = ""
# Collection is the CosmosDB collection within the database.
Collection = ""
# LockIDPrefix is a prefix to add to all the lock IDs.  The lock IDs are
# constructed from this prefix and the action digests.
LockIDPrefix = ""

# The Cache section implements execution caching.  With a cache, an action
# is executed again if and only if its digest is not found in the action
# cache.  Otherwise, the outputs of the action, if needed, are downloaded
# from a Content-Addressable Storage.
[Cache]
# Access is a slice that gives the access that Nyb has to the cache.
# "read" indicates that Nyb can read the cache, and "write" indicates that
# Nyb can write to the cache.
Access = ["read", "write"]

# The Cache.AzureStorage section implements caching on Azure Blob Storage.
# The action cache and the content-addressable storage for the outputs
# are implemented as virtual directories, and each action and output is
# stored as a separate block blob.
[Cache.AzureStorage]
# AccountName is the name of the Azure Storage account.
AccountName = ""
# AccountKey is the key for the Azure Storage account.
AccountKey = ""
# Container is the name of the Azure Storage container for the cache.
Container = ""
# BlobNamePrefix is a prefix, or "virtual directory", that is put before
# all the blob names.
BlobNamePrefix = ""

The Nyb dependency graph

The dependency graph is given to Nyb through a JSON file, whose path is in the configuration file. The format of the dependency graph is given below using the TypeScript type system:

/**
 * A dependency graph is a list of rules.
 */
type DependencyGraph = Rule[];

/**
 * A rule gives instructions (the action) on how to build a target,
 * what are the dependencies for this action, and what are the output
 * of this action.  Additionally, the execution of the action can be
 * tuned with an execution policy.
 */
type Rule = {
  /**
   * target is the target built by this rule.
   */
  target: string;

  /**
   * Tags are arbitrary labels the target is associated with.  Tags
   * can be used for selecting a subset of targets when invoking the
   * command line, or for grouping targets together and associating
   * some behavior to the group as a whole (such as with execution pools,
   * see TOML configuration file.)
   */
  tags?: string[];

  /**
   * dependsOn lists the targets the rule depends on.  The transitive outputs
   * of these dependencies are made available to the rule when it is
   * executed.  Loops are forbidden.
   */
  dependsOn?: string[];

  /**
   * sources lists the source files the rule depends on.  These source
   * files are checked in the VCS.  Typically, everything that is checked in
   * should be a source, and everything that is not checked in should be
   * listed as the output of a rule (it is possible to have undeclared
   * outputs, though this is not recommended).
   *
   * While it is possible to give a whole directory as an output, all
   * the items in the sources field must be files.
   *
   * A rule can have no source, in which case the caching behavior only
   * depends on the transitive dependencies.
   */
  sources?: { files: string[] };

  /**
   * steps are the steps for the action associated with the rule.
   * They give the instructions to follow when building the target.
   *
   * It is possible to not have any build step, in which case the rule only
   * has the effect of grouping source files and dependencies together.
   *
   * The steps are always executed serially for a given rule (but the
   * execution of the steps of different rules is concurrent).
   */
  steps?: Step[];

  /**
   * outputs lists the output files or directories the rule produces.
   * All these outputs are cached.  The steps can produce non-declared
   * output files (such as files outside the workspace, which is the
   * case with, e.g., mvn install), or influence the system in other
   * manners (such as when `docker build` adds a new image to the local
   * Docker daemon).  However, for better hermeticity and caching, this
   * non-declared behavior should be limited as much as possible.
   *
   * Contrary to more opinionated build systems such as Bazel, the outputs
   * can live alongside the source files.  In fact, there is no constraint
   * on the shape of the workspace.  This absence of opinion is crucial to
   * enable the use of existing toolchains "as is."
   *
   * A rule can have no output.
   */
  outputs?: []string;

  /**
   * executionPolicy modulates when the steps are executed.
   *
   * This modulation is important to accommodate for the non-hermeticity of
   * some rules, and is the second important difference between this build
   * system and more opinionated build systems such as Bazel.
   *
   * The default execution policy is "cache".
   */
  executionPolicy?: ExecutionPolicy;

  /**
   * timeout is an optional timeout after which the steps are killed and
   * the action is marked as a failure.
   */
  timeout?: Timeout;
};

/**
 * A step is a unit of work undertaken by the executor.  The meaning of all
 * the fields is dependent on the executor.  This flexibility allows Nyb
 * to adapt to various toolchains.
 */
type Step = {
  /**
   * tool is the name of the tool for this step.  A tool brings together
   * multiple commands.  As there is only one version for a tool, all the
   * commands are versioned together.
   */
  tool: string;

  /**
   * command is the command to execute within the tool.
   */
  command: string;

  /**
   * args is a map of argument names to values, which can be of any kind
   * (JSON numbers, strings, lists, objects, maps, ...).
   *
   * Arguments modulate the behavior of the command.  They can be
   * used to pass around source files, output files, and any build
   * or test flag specific to the rule.
   */
  args: { [name: string]: any };
};

/**
 * executionPolicy changes when the steps of the action for a rule
 * are executed.
 */
enum executionPolicy {
  /**
   * cache is the default execution policy.  It means that the action result
   * is retrieved from cache if it is found, and in this case no further
   * execution takes place.
   */
  cache = "cache",

  /**
   * runOverCache involves executing the action again, even if the action
   * result is retrieved from cache.  If the outputs already exist in the
   * workspace, no further execution takes place.
   *
   * This policy is useful when some of the outputs are not declared but
   * used by dependent rules.  In this case, a "cache" policy would
   * download only the declared outputs, and the undeclared outputs would
   * be missing.
   *
   * An example is Maven.  `mvn install` builds an archive in
   * the "target" directory, then installs it in the local maven repository,
   * outside of the workspace.
   *
   * A maven rule could cache the outputs in the "target" directory, then
   * have a "runOverCache" policy to ensure the installation in the local
   * maven repository.  Maven with a cached "target" directory is
   * faster if incremental compilation is turned on, so most of the benefits
   * of caching are still retained.
   */
  runOverCache = "runOverCache",

  /**
   * alwaysRun executes the action every time the outputs are
   * needed, even if the outputs already exist in the workspace.
   *
   * This policy is useful when some system-wide behavior occurs outside
   * of the file system.
   *
   * An example is Docker.  `docker build` creates an image that
   * is put in the local Docker daemon.  Because images take disk space,
   * `docker image prune` is a command that is often run.  If this command
   * is run and the action steps are not executed again, dependent rules
   * might rely on an image that no longer exists.  An "alwaysRun" policy
   * ensures that this would never be the case.  As "docker build" caches
   * the build steps, most of the benefits of Nyb caching are still retained.
   */
  alwaysRun = "alwaysRun",
}

/**
 * Timeout gives the timeout for the action steps.  The timeout applies
 * to the whole sequence of action steps for a rule, not to each of them
 * separately.
 *
 * The timeout enumeration is taken out straight from Bazel.
 */
enum Timeout {
  /** short is a 1-minute timeout. */
  short = "short",

  /** moderate is a 5-minute timeout. */
  moderate = "moderate",

  /** long is a 15-minute timeout. */
  long = "long",
  
  /** eternal is a 1-hour timeout. */
  eternal = "eternal",
}