Skip to content

changing constraints results in "failed to place all allocations" #12016

@empikls

Description

@empikls

Nomad version

1.2.5

Nomad job example

job "autoscaler" {
  type        = "system"
  datacenters = ["euc1", "fsn1"]

  constraint {
    attribute = "${node.class}"
    operator  = "regexp"
    value     = "(cloud-cpu-worker|object-detection)"
  }

  constraint {
    operator  = "distinct_property"
    attribute = "${node.datacenter}"
    value     = "1"
  }

  group "autoscaler" {
    network {
      mode = "bridge"
      port "http" {
        host_network = "private"
      }

      port "sidecar" {
        host_network = "private"
      }

      port "stats" {
        host_network = "private"
      }
    }

    service {
      name = "autoscaler"
      port = "http"

      meta {
        envoy_address = "${NOMAD_ADDR_stats}"
      }

      connect {
        sidecar_service {
          port = "sidecar"
          proxy {
            expose {
              path {
                path            = "/metrics"
                protocol        = "http"
                local_path_port = 9102
                listener_port   = "stats"
              }
            }
            config {
              envoy_prometheus_bind_addr = "0.0.0.0:9102"
            }
            upstreams {
              destination_name = "prometheus"
              local_bind_port  = 9091
            }
          }
        }
      }
      check {
        type     = "http"
        path     = "/v1/health"
        interval = "3s"
        timeout  = "1s"

        check_restart {
          limit           = 3
          grace           = "30s"
          ignore_warnings = false
        }
      }
    }

    task "autoscaler" {
      driver = "docker"

      config {
        image   = "hashicorp/nomad-autoscaler:0.3.3"
        command = "nomad-autoscaler"
        ports   = ["http"]

        args = [
          "agent",
          "-config",
          "${NOMAD_TASK_DIR}/config.json",
          "-http-bind-address",
          "0.0.0.0",
          "-http-bind-port",
          "${NOMAD_PORT_http}",
        ]
      }

      template {
        data = replace(jsonencode({
          nomad = {
            address = "http://{{ env \"attr.unique.network.ip-address\" }}:4646"
            token   = "{{ key \"secrets/nomad/autoscaler/token\" }}"
          }
          telemetry = {
            prometheus_metrics = true
            disable_hostname   = true
          }
          apm = {
            prometheus = {
              driver = "prometheus"
              config = {
                address = "http://{{ env \"NOMAD_UPSTREAM_ADDR_prometheus\" }}"
              }
            }
          }
          strategy = {
            target-value = {
              driver = "target-value"
            }
            pass-through = {
              driver = "pass-through"
            }
          }
        }), "\\", "")

        destination = "${NOMAD_TASK_DIR}/config.json"
      }

      resources {
        cpu    = 100
        memory = 256
      }
    }
  }
}

.gitlab-ci.yml

image: image

stages:
  - deploy

.deploy:
  allow_failure: false
  script:
    - export SOME VARIABLE
    - for job in $(ls jobs/*); do 
        echo "Deploying $job";
        nomad job run $job; 
      done
  when: manual
  environment:
    name: $ENVIRONMENT
    url: some url

deploy to qa-1:
  stage: deploy
  extends: .deploy
  variables:
    NOMAD_TOKEN: $QA_1_NOMAD_TOKEN
    ENVIRONMENT: qa-1

Issue

When trying to deploy job via gitlab job failed when allocation is already placed or cannot find any constraint that can be missed
nomad-qa-2 nomad job status autoscaler

ID            = autoscaler
Name          = autoscaler
Submit Date   = 2022-02-07T15:27:27+02:00
Type          = system
Priority      = 50
Datacenters   = euc1,fsn1
Namespace     = default
Status        = running
Periodic      = false
Parameterized = false

Summary
Task Group  Queued  Starting  Running  Failed  Complete  Lost
autoscaler  0       0         2        0       0         0

Allocations
ID        Node ID   Task Group  Version  Desired  Status   Created     Modified
3880e81a  06f66f20  autoscaler  0        run      running  12m38s ago  12m36s ago
4314feca  58b148c7  autoscaler  0        run      running  12m38s ago  12m34s ago

Output from gitlab:

$ for job in $(ls jobs/*); do echo "Deploying $job"; nomad job run $job; done
Deploying jobs/autoscaler.nomad
==> Monitoring evaluation "11fb88cc"
    Evaluation triggered by job "autoscaler"
    Evaluation status changed: "pending" -> "complete"
==> Evaluation "11fb88cc" finished with status "complete" but failed to place all allocations:
    Task Group "autoscaler" (failed to place 1 allocation):
      * Class "cloud-cpu-worker": 1 nodes excluded by filter
      * Class "stream-processing": 1 nodes excluded by filter
      * Class "cloud-cache": 1 nodes excluded by filter
      * Class "cache": 1 nodes excluded by filter
      * Constraint "${node.class} regexp (cloud-cpu-worker|object-detection)": 3 nodes excluded by filter
      * Constraint "distinct_property: ${node.datacenter}=euc1 used by 1 allocs": 1 nodes excluded by filter
Cleaning up file based variables
00:02
ERROR: Job failed: exit code 1

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    Status

    Done

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions