diff --git a/README.md b/README.md index d053392..952b8ba 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,147 @@ +# Lobby - simple server/service discovery service + +In one of ours projects we needed service discovery that doesn't need complicated setup just to share +a simple information about running services and checking if they are still alive. So we came up with +this small service we call Lobby. It's like a lobby in games but in this case there are servers. Each +server runs one or more instances of lobby daemon and it regularly sends how it's configured. + +We call the information about the server and services running on it *labels*. Every server shares +"discovery packet" which is basically a json that looks like this: + +```json +{ + "hostname": "smtp.example.com", + "labels": [ + "service:smtp", + "public_ip4:1.2.3.4", + "public_ip6:2a03::1" + ], + "last_check": 1630612478 +} +``` + +The packet contains information what's the server hostname and then list of labels describing +what's running on it and what are the IP addresses. What's in the labels is completely up to you +but in some use-cases (Node Exporter API endpoint) it expects "NAME:VALUE" format. + +The labels can be configured via environment variables but also as files located in +*/etc/lobby/labels* (configurable path) so it can dynamically change. + +When everything is running just call your favorite http client against "http://localhost:1313/" +on any of the running instances and lobby returns you list of all available servers and +their labels. You can hook it to Prometheus, deployment scripts, CI/CD automations or +your internal system that sends emails and it needs to know where is the SMTP server for +example. + +Lobby doesn't care if you have a one or thousand instances of it running. Each instance +is connected to a common point which is a [NATS server](https://nats.io/) in this case. NATS is super fast and reliable +messaging system which handles the communication part but also the high availability part. +NATS is easy to run and it offloads a huge part of the problem from lobby itself. + +The code is open to support multiple backends and it's not that hard to add a new one. + +## Quickstart guide + +The quickest way how to run lobbyd on your server is this: + +```shell +wget -O /usr/local/bin/lobbyd https://.... +chmod +x /usr/local/bin/lobbyd + +# Update NATS_URL and LABELS here +cat << EOF > /etc/systemd/system/lobbyd.service +[Unit] +Description=Server Lobby service +After=network.target + +[Service] +Environment="NATS_URL=tls://nats.example.com:4222" +Environment="LABELS=service:ns,ns:primary,public_ip4:1,2,3,4,public_ip6:2a03::1,location:prague" +ExecStart=/usr/local/bin/lobbyd +PrivateTmp=false + +[Install] +WantedBy=multi-user.target +EOF + +systemctl daemon-reload +systemctl start lobbyd +systemctl enable lobbyd +``` + +If you run lobbyd in production, consider to create its own system user and group and add both into this +service file. It doesn't need to access almost anything in your system. + +## Daemon + +There are other config directives you can use to fine-tune lobbyd to exactly what you need. + +| Environment variable | Type | Default | Required | Note | +| ---------------------- | ------ | ----------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | +| TOKEN | string | | no | Authentication token for API, if empty auth is disabled | +| HOST | string | 127.0.0.1 | no | IP address used for the REST server to listen | +| PORT | int | 1313 | no | Port related to the address above | +| NATS_URL | string | | yes | NATS URL used to connect to the NATS server | +| NATS_DISCOVERY_CHANNEL | string | lobby.discovery | no | Channel where the keep-alive packets are sent | +| LABELS | string | | no | List of labels, labels should be separated by comma | +| LABELS_PATH | string | /etc/lobby/labels | no | Path where filesystem based labels are located, one label per line, filename is not important for lobby | +| HOSTNAME | string | | no | Override local machine's hostname | +| CLEAN_EVERY | int | 15 | no | How often to clean the list of discovered servers to get rid of the not alive ones [secs] | +| KEEP_ALIVE | int | 5 | no | how often to send the keep-alive discovery message with all available information [secs] | +| TTL | int | 30 | no | After how many secs is discovery record considered as invalid | +| NODE_EXPORTER_PORT | int | 9100 | no | Default port where node_exporter listens on all registered servers, this is used when the special prometheus labels doesn't contain port | +| REGISTER | bool | true | no | If true (default) then local instance is registered with other instance (discovery packet is sent regularly), if false the daemon runs only as a client | + +### Service discovery for Prometheus + +Lobbyd has an API endpoint that returns list of targets for [Prometheus's HTTP SD config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#http_sd_config). That +allows you to use lobbyd to configure Prometheus dynamically based on running servers. There are special kind of labels that are used to set the output for Prometheus properly. +Let's check this: + + prometheus:nodeexporter:host:192.168.1.1 + prometheus:nodeexporter:port:9100 + +When you open URL http://localhost:1313/v1/prometheus/nodeexporter it returns this: + +```json +[ + { + "Labels": { + "location": "prague" + }, + "Targets": [ + "192.168.1.1:9100" + ] + } +] +``` + +"nodeexporter" can be anything you want. It determines name of the monitored service, the service that provides the */metrics* endpoint. + +There is also a minimal way how to add server to the prometheus output. Simply set label *prometheus:nodeexporter* and it +will use default port from the environment variable above and hostname of the server + +```json +[ + { + "Labels": {}, + "Targets": [ + "192.168.1.1:9100" + ] + } +] +``` + +At least one prometheus label has to be set to export the monitoring service in the prometheus output. + +## REST API + +So far the REST API is super simple and it has only two endpoints: + + GET / # Returns list of all discovered servers and their labels. + GET /v1/ # Same as / + GET /v1/prometheus/:name # Generates output for Prometheus's SD config, name is group of the monitoring services described above. + ## TODO * [X] filtering based on labels @@ -9,7 +153,8 @@ One file per one label * [X] Deregistration * [X] Deregister when the daemon exits -* [ ] Separate the NATS code so it can support multiple backend/drivers -* [ ] Documentation +* [X] Separate the NATS code so it can support multiple backend/drivers +* [X] Documentation * [ ] Tests * [ ] Command hooks - script or list of scripts that are triggered when discovery status has changed + diff --git a/daemon/config.go b/daemon/config.go index 3fea3ee..87d4fa7 100644 --- a/daemon/config.go +++ b/daemon/config.go @@ -8,20 +8,19 @@ import ( // Config keeps info about configuration of this daemon type Config struct { - Token string `envconfig:"TOKEN" required:"false"` // Authentication token, if empty auth is disabled - Host string `envconfig:"HOST" required:"false" default:"127.0.0.1"` // IP address used for the REST server to listen - Port uint16 `envconfig:"PORT" required:"false" default:"1313"` // Port related to the address above - NATSURL string `envconfig:"NATS_URL" required:"true"` // NATS URL used to connect to the NATS server - NATSDiscoveryChannel string `envconfig:"NATS_DISCOVERY_CHANNEL" required:"true" default:"lobby.discovery"` // Channel where the kepp alive packets are sent - Labels []string `envconfig:"LABELS" required:"false" default:""` // List of labels - LabelsPath string `envconfig:"LABELS_PATH" required:"false" default:"/etc/lobby/labels"` // Path where filesystem based labels are located - // TemplatesPath string `envconfig:"TEMPLATES_PATH" required:"false" default:"/etc/lobby/templates"` // Path where templates are stored for custom output - HostName string `envconfig:"HOSTNAME" required:"false"` // Overrise local machine's hostname - CleanEvery uint `envconfig:"CLEAN_EVERY" required:"false" default:"15"` // How often to clean the list of servers to get rid of the not alive ones - KeepAlive uint `envconfig:"KEEP_ALIVE" required:"false" default:"5"` // how often to send the keepalive message with all availabel information [secs] - TTL uint `envconfig:"TTL" required:"false" default:"30"` // After how many secs is discovery record considered as invalid - NodeExporterPort uint `envconfig:"NODE_EXPORTER_PORT" required:"false" default:"9100"` // Default port where node_exporter listens on all registered servers - Register bool `envconfig:"REGISTER" required:"false" default:"true"` // If true (default) then local instance is registered with other instance (discovery packet is sent regularly) + Token string `envconfig:"TOKEN" required:"false"` // Authentication token, if empty auth is disabled + Host string `envconfig:"HOST" required:"false" default:"127.0.0.1"` // IP address used for the REST server to listen + Port uint16 `envconfig:"PORT" required:"false" default:"1313"` // Port related to the address above + NATSURL string `envconfig:"NATS_URL" required:"true"` // NATS URL used to connect to the NATS server + NATSDiscoveryChannel string `envconfig:"NATS_DISCOVERY_CHANNEL" required:"false" default:"lobby.discovery"` // Channel where the kepp alive packets are sent + Labels []string `envconfig:"LABELS" required:"false" default:""` // List of labels + LabelsPath string `envconfig:"LABELS_PATH" required:"false" default:"/etc/lobby/labels"` // Path where filesystem based labels are located + HostName string `envconfig:"HOSTNAME" required:"false"` // Overrise local machine's hostname + CleanEvery uint `envconfig:"CLEAN_EVERY" required:"false" default:"15"` // How often to clean the list of servers to get rid of the not alive ones + KeepAlive uint `envconfig:"KEEP_ALIVE" required:"false" default:"5"` // how often to send the keepalive message with all availabel information [secs] + TTL uint `envconfig:"TTL" required:"false" default:"30"` // After how many secs is discovery record considered as invalid + NodeExporterPort uint `envconfig:"NODE_EXPORTER_PORT" required:"false" default:"9100"` // Default port where node_exporter listens on all registered servers + Register bool `envconfig:"REGISTER" required:"false" default:"true"` // If true (default) then local instance is registered with other instance (discovery packet is sent regularly) } // GetConfig return configuration created based on environment variables diff --git a/daemon/handlers.go b/daemon/handlers.go index 72b172b..70f49a9 100644 --- a/daemon/handlers.go +++ b/daemon/handlers.go @@ -2,18 +2,20 @@ package main import ( "net/http" + "strings" "github.com/labstack/echo" "github.com/rosti-cz/server_lobby/server" ) func listHandler(c echo.Context) error { - label := c.QueryParam("label") + labels := c.QueryParam("labels") var discoveries []server.Discovery - if len(label) > 0 { - discoveries = discoveryStorage.Filter(label) + if len(labels) > 0 { + labelsFilterSlice := strings.Split(labels, ",") + discoveries = discoveryStorage.Filter(labelsFilterSlice) } else { discoveries = discoveryStorage.GetAll() } @@ -22,7 +24,9 @@ func listHandler(c echo.Context) error { } func prometheusHandler(c echo.Context) error { - services := preparePrometheusOutput(discoveryStorage.GetAll()) + name := c.Param("name") + + services := preparePrometheusOutput(name, discoveryStorage.GetAll()) return c.JSONPretty(http.StatusOK, services, " ") } diff --git a/daemon/main.go b/daemon/main.go index 2d24aa4..062f56a 100644 --- a/daemon/main.go +++ b/daemon/main.go @@ -141,7 +141,7 @@ func main() { // Routes e.GET("/", listHandler) e.GET("/v1/", listHandler) - e.GET("/v1/prometheus", prometheusHandler) + e.GET("/v1/prometheus/:name", prometheusHandler) // e.GET("/template/:template", func(c echo.Context) error { // templateName := c.Param("template") diff --git a/daemon/prometheus.go b/daemon/prometheus.go index f9c7ff1..135ef2f 100644 --- a/daemon/prometheus.go +++ b/daemon/prometheus.go @@ -29,17 +29,18 @@ type PrometheusService struct { // preparePrometheusOutput returns PrometheusServices which is struct compatible to what Prometheus expects // labels starting "ne:" will be used as NodeExporter labels. Label "ne:port:9123" will be used as port // used in the targets field. Same for "ne:host:1.2.3.4". -func preparePrometheusOutput(discoveries []server.Discovery) PrometheusServices { +func preparePrometheusOutput(name string, discoveries []server.Discovery) PrometheusServices { services := PrometheusServices{} for _, discovery := range discoveries { port := strconv.Itoa(int(config.NodeExporterPort)) host := discovery.Hostname + var add bool // add to the prometheus output when there is at least one prometheus related label labels := map[string]string{} - for _, label := range discovery.FindLabels("ne") { - trimmed := strings.TrimPrefix(label, "ne:") + for _, label := range discovery.FindLabels("prometheus:" + name) { + trimmed := strings.TrimPrefix(label, "prometheus:"+name+":") parts := strings.SplitN(trimmed, ":", 2) if len(parts) == 2 { if parts[0] == "port" { @@ -49,15 +50,28 @@ func preparePrometheusOutput(discoveries []server.Discovery) PrometheusServices } else { labels[parts[0]] = parts[1] } + add = true } } - service := PrometheusService{ - Targets: []string{host + ":" + port}, - Labels: labels, + // This has to be checked here again because FindLabels adds : at the end of the label name. + if !add { + for _, label := range discovery.Labels { + if label == "prometheus:"+name { + add = true + break + } + } } - services = append(services, service) + if add { + service := PrometheusService{ + Targets: []string{host + ":" + port}, + Labels: labels, + } + + services = append(services, service) + } } diff --git a/server/main.go b/server/main.go index cf5f639..06d8fef 100644 --- a/server/main.go +++ b/server/main.go @@ -147,14 +147,22 @@ func (d *Discoveries) GetAll() []Discovery { return d.activeServers } -func (d *Discoveries) Filter(labelFilter string) []Discovery { +func (d *Discoveries) Filter(labelsFilter []string) []Discovery { newSet := []Discovery{} - if len(labelFilter) > 0 { + var found bool + if len(labelsFilter) > 0 { for _, discovery := range d.activeServers { + found = false for _, label := range discovery.Labels { - if label == labelFilter { - newSet = append(newSet, discovery) + for _, labelFilter := range labelsFilter { + if label == labelFilter { + newSet = append(newSet, discovery) + found = true + break + } + } + if found { break } }