Prometheus output update, documentation in README
This commit is contained in:
parent
c982038c53
commit
5461af2902
149
README.md
149
README.md
@ -1,3 +1,147 @@
|
||||
# Lobby - simple server/service discovery service
|
||||
|
||||
In one of ours projects we needed service discovery that doesn't need complicated setup just to share
|
||||
a simple information about running services and checking if they are still alive. So we came up with
|
||||
this small service we call Lobby. It's like a lobby in games but in this case there are servers. Each
|
||||
server runs one or more instances of lobby daemon and it regularly sends how it's configured.
|
||||
|
||||
We call the information about the server and services running on it *labels*. Every server shares
|
||||
"discovery packet" which is basically a json that looks like this:
|
||||
|
||||
```json
|
||||
{
|
||||
"hostname": "smtp.example.com",
|
||||
"labels": [
|
||||
"service:smtp",
|
||||
"public_ip4:1.2.3.4",
|
||||
"public_ip6:2a03::1"
|
||||
],
|
||||
"last_check": 1630612478
|
||||
}
|
||||
```
|
||||
|
||||
The packet contains information what's the server hostname and then list of labels describing
|
||||
what's running on it and what are the IP addresses. What's in the labels is completely up to you
|
||||
but in some use-cases (Node Exporter API endpoint) it expects "NAME:VALUE" format.
|
||||
|
||||
The labels can be configured via environment variables but also as files located in
|
||||
*/etc/lobby/labels* (configurable path) so it can dynamically change.
|
||||
|
||||
When everything is running just call your favorite http client against "http://localhost:1313/"
|
||||
on any of the running instances and lobby returns you list of all available servers and
|
||||
their labels. You can hook it to Prometheus, deployment scripts, CI/CD automations or
|
||||
your internal system that sends emails and it needs to know where is the SMTP server for
|
||||
example.
|
||||
|
||||
Lobby doesn't care if you have a one or thousand instances of it running. Each instance
|
||||
is connected to a common point which is a [NATS server](https://nats.io/) in this case. NATS is super fast and reliable
|
||||
messaging system which handles the communication part but also the high availability part.
|
||||
NATS is easy to run and it offloads a huge part of the problem from lobby itself.
|
||||
|
||||
The code is open to support multiple backends and it's not that hard to add a new one.
|
||||
|
||||
## Quickstart guide
|
||||
|
||||
The quickest way how to run lobbyd on your server is this:
|
||||
|
||||
```shell
|
||||
wget -O /usr/local/bin/lobbyd https://....
|
||||
chmod +x /usr/local/bin/lobbyd
|
||||
|
||||
# Update NATS_URL and LABELS here
|
||||
cat << EOF > /etc/systemd/system/lobbyd.service
|
||||
[Unit]
|
||||
Description=Server Lobby service
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Environment="NATS_URL=tls://nats.example.com:4222"
|
||||
Environment="LABELS=service:ns,ns:primary,public_ip4:1,2,3,4,public_ip6:2a03::1,location:prague"
|
||||
ExecStart=/usr/local/bin/lobbyd
|
||||
PrivateTmp=false
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
systemctl daemon-reload
|
||||
systemctl start lobbyd
|
||||
systemctl enable lobbyd
|
||||
```
|
||||
|
||||
If you run lobbyd in production, consider to create its own system user and group and add both into this
|
||||
service file. It doesn't need to access almost anything in your system.
|
||||
|
||||
## Daemon
|
||||
|
||||
There are other config directives you can use to fine-tune lobbyd to exactly what you need.
|
||||
|
||||
| Environment variable | Type | Default | Required | Note |
|
||||
| ---------------------- | ------ | ----------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| TOKEN | string | | no | Authentication token for API, if empty auth is disabled |
|
||||
| HOST | string | 127.0.0.1 | no | IP address used for the REST server to listen |
|
||||
| PORT | int | 1313 | no | Port related to the address above |
|
||||
| NATS_URL | string | | yes | NATS URL used to connect to the NATS server |
|
||||
| NATS_DISCOVERY_CHANNEL | string | lobby.discovery | no | Channel where the keep-alive packets are sent |
|
||||
| LABELS | string | | no | List of labels, labels should be separated by comma |
|
||||
| LABELS_PATH | string | /etc/lobby/labels | no | Path where filesystem based labels are located, one label per line, filename is not important for lobby |
|
||||
| HOSTNAME | string | | no | Override local machine's hostname |
|
||||
| CLEAN_EVERY | int | 15 | no | How often to clean the list of discovered servers to get rid of the not alive ones [secs] |
|
||||
| KEEP_ALIVE | int | 5 | no | how often to send the keep-alive discovery message with all available information [secs] |
|
||||
| TTL | int | 30 | no | After how many secs is discovery record considered as invalid |
|
||||
| NODE_EXPORTER_PORT | int | 9100 | no | Default port where node_exporter listens on all registered servers, this is used when the special prometheus labels doesn't contain port |
|
||||
| REGISTER | bool | true | no | If true (default) then local instance is registered with other instance (discovery packet is sent regularly), if false the daemon runs only as a client |
|
||||
|
||||
### Service discovery for Prometheus
|
||||
|
||||
Lobbyd has an API endpoint that returns list of targets for [Prometheus's HTTP SD config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#http_sd_config). That
|
||||
allows you to use lobbyd to configure Prometheus dynamically based on running servers. There are special kind of labels that are used to set the output for Prometheus properly.
|
||||
Let's check this:
|
||||
|
||||
prometheus:nodeexporter:host:192.168.1.1
|
||||
prometheus:nodeexporter:port:9100
|
||||
|
||||
When you open URL http://localhost:1313/v1/prometheus/nodeexporter it returns this:
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"Labels": {
|
||||
"location": "prague"
|
||||
},
|
||||
"Targets": [
|
||||
"192.168.1.1:9100"
|
||||
]
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
"nodeexporter" can be anything you want. It determines name of the monitored service, the service that provides the */metrics* endpoint.
|
||||
|
||||
There is also a minimal way how to add server to the prometheus output. Simply set label *prometheus:nodeexporter* and it
|
||||
will use default port from the environment variable above and hostname of the server
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"Labels": {},
|
||||
"Targets": [
|
||||
"192.168.1.1:9100"
|
||||
]
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
At least one prometheus label has to be set to export the monitoring service in the prometheus output.
|
||||
|
||||
## REST API
|
||||
|
||||
So far the REST API is super simple and it has only two endpoints:
|
||||
|
||||
GET / # Returns list of all discovered servers and their labels.
|
||||
GET /v1/ # Same as /
|
||||
GET /v1/prometheus/:name # Generates output for Prometheus's SD config, name is group of the monitoring services described above.
|
||||
|
||||
## TODO
|
||||
|
||||
* [X] filtering based on labels
|
||||
@ -9,7 +153,8 @@
|
||||
One file per one label
|
||||
* [X] Deregistration
|
||||
* [X] Deregister when the daemon exits
|
||||
* [ ] Separate the NATS code so it can support multiple backend/drivers
|
||||
* [ ] Documentation
|
||||
* [X] Separate the NATS code so it can support multiple backend/drivers
|
||||
* [X] Documentation
|
||||
* [ ] Tests
|
||||
* [ ] Command hooks - script or list of scripts that are triggered when discovery status has changed
|
||||
|
||||
|
@ -12,10 +12,9 @@ type Config struct {
|
||||
Host string `envconfig:"HOST" required:"false" default:"127.0.0.1"` // IP address used for the REST server to listen
|
||||
Port uint16 `envconfig:"PORT" required:"false" default:"1313"` // Port related to the address above
|
||||
NATSURL string `envconfig:"NATS_URL" required:"true"` // NATS URL used to connect to the NATS server
|
||||
NATSDiscoveryChannel string `envconfig:"NATS_DISCOVERY_CHANNEL" required:"true" default:"lobby.discovery"` // Channel where the kepp alive packets are sent
|
||||
NATSDiscoveryChannel string `envconfig:"NATS_DISCOVERY_CHANNEL" required:"false" default:"lobby.discovery"` // Channel where the kepp alive packets are sent
|
||||
Labels []string `envconfig:"LABELS" required:"false" default:""` // List of labels
|
||||
LabelsPath string `envconfig:"LABELS_PATH" required:"false" default:"/etc/lobby/labels"` // Path where filesystem based labels are located
|
||||
// TemplatesPath string `envconfig:"TEMPLATES_PATH" required:"false" default:"/etc/lobby/templates"` // Path where templates are stored for custom output
|
||||
HostName string `envconfig:"HOSTNAME" required:"false"` // Overrise local machine's hostname
|
||||
CleanEvery uint `envconfig:"CLEAN_EVERY" required:"false" default:"15"` // How often to clean the list of servers to get rid of the not alive ones
|
||||
KeepAlive uint `envconfig:"KEEP_ALIVE" required:"false" default:"5"` // how often to send the keepalive message with all availabel information [secs]
|
||||
|
@ -2,18 +2,20 @@ package main
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/labstack/echo"
|
||||
"github.com/rosti-cz/server_lobby/server"
|
||||
)
|
||||
|
||||
func listHandler(c echo.Context) error {
|
||||
label := c.QueryParam("label")
|
||||
labels := c.QueryParam("labels")
|
||||
|
||||
var discoveries []server.Discovery
|
||||
|
||||
if len(label) > 0 {
|
||||
discoveries = discoveryStorage.Filter(label)
|
||||
if len(labels) > 0 {
|
||||
labelsFilterSlice := strings.Split(labels, ",")
|
||||
discoveries = discoveryStorage.Filter(labelsFilterSlice)
|
||||
} else {
|
||||
discoveries = discoveryStorage.GetAll()
|
||||
}
|
||||
@ -22,7 +24,9 @@ func listHandler(c echo.Context) error {
|
||||
}
|
||||
|
||||
func prometheusHandler(c echo.Context) error {
|
||||
services := preparePrometheusOutput(discoveryStorage.GetAll())
|
||||
name := c.Param("name")
|
||||
|
||||
services := preparePrometheusOutput(name, discoveryStorage.GetAll())
|
||||
|
||||
return c.JSONPretty(http.StatusOK, services, " ")
|
||||
}
|
||||
|
@ -141,7 +141,7 @@ func main() {
|
||||
// Routes
|
||||
e.GET("/", listHandler)
|
||||
e.GET("/v1/", listHandler)
|
||||
e.GET("/v1/prometheus", prometheusHandler)
|
||||
e.GET("/v1/prometheus/:name", prometheusHandler)
|
||||
|
||||
// e.GET("/template/:template", func(c echo.Context) error {
|
||||
// templateName := c.Param("template")
|
||||
|
@ -29,17 +29,18 @@ type PrometheusService struct {
|
||||
// preparePrometheusOutput returns PrometheusServices which is struct compatible to what Prometheus expects
|
||||
// labels starting "ne:" will be used as NodeExporter labels. Label "ne:port:9123" will be used as port
|
||||
// used in the targets field. Same for "ne:host:1.2.3.4".
|
||||
func preparePrometheusOutput(discoveries []server.Discovery) PrometheusServices {
|
||||
func preparePrometheusOutput(name string, discoveries []server.Discovery) PrometheusServices {
|
||||
services := PrometheusServices{}
|
||||
|
||||
for _, discovery := range discoveries {
|
||||
port := strconv.Itoa(int(config.NodeExporterPort))
|
||||
host := discovery.Hostname
|
||||
var add bool // add to the prometheus output when there is at least one prometheus related label
|
||||
|
||||
labels := map[string]string{}
|
||||
|
||||
for _, label := range discovery.FindLabels("ne") {
|
||||
trimmed := strings.TrimPrefix(label, "ne:")
|
||||
for _, label := range discovery.FindLabels("prometheus:" + name) {
|
||||
trimmed := strings.TrimPrefix(label, "prometheus:"+name+":")
|
||||
parts := strings.SplitN(trimmed, ":", 2)
|
||||
if len(parts) == 2 {
|
||||
if parts[0] == "port" {
|
||||
@ -49,15 +50,28 @@ func preparePrometheusOutput(discoveries []server.Discovery) PrometheusServices
|
||||
} else {
|
||||
labels[parts[0]] = parts[1]
|
||||
}
|
||||
add = true
|
||||
}
|
||||
}
|
||||
|
||||
// This has to be checked here again because FindLabels adds : at the end of the label name.
|
||||
if !add {
|
||||
for _, label := range discovery.Labels {
|
||||
if label == "prometheus:"+name {
|
||||
add = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if add {
|
||||
service := PrometheusService{
|
||||
Targets: []string{host + ":" + port},
|
||||
Labels: labels,
|
||||
}
|
||||
|
||||
services = append(services, service)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -147,14 +147,22 @@ func (d *Discoveries) GetAll() []Discovery {
|
||||
return d.activeServers
|
||||
}
|
||||
|
||||
func (d *Discoveries) Filter(labelFilter string) []Discovery {
|
||||
func (d *Discoveries) Filter(labelsFilter []string) []Discovery {
|
||||
newSet := []Discovery{}
|
||||
|
||||
if len(labelFilter) > 0 {
|
||||
var found bool
|
||||
if len(labelsFilter) > 0 {
|
||||
for _, discovery := range d.activeServers {
|
||||
found = false
|
||||
for _, label := range discovery.Labels {
|
||||
for _, labelFilter := range labelsFilter {
|
||||
if label == labelFilter {
|
||||
newSet = append(newSet, discovery)
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if found {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user