Prometheus output update, documentation in README
This commit is contained in:
parent
c982038c53
commit
5461af2902
149
README.md
149
README.md
@ -1,3 +1,147 @@
|
|||||||
|
# Lobby - simple server/service discovery service
|
||||||
|
|
||||||
|
In one of ours projects we needed service discovery that doesn't need complicated setup just to share
|
||||||
|
a simple information about running services and checking if they are still alive. So we came up with
|
||||||
|
this small service we call Lobby. It's like a lobby in games but in this case there are servers. Each
|
||||||
|
server runs one or more instances of lobby daemon and it regularly sends how it's configured.
|
||||||
|
|
||||||
|
We call the information about the server and services running on it *labels*. Every server shares
|
||||||
|
"discovery packet" which is basically a json that looks like this:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"hostname": "smtp.example.com",
|
||||||
|
"labels": [
|
||||||
|
"service:smtp",
|
||||||
|
"public_ip4:1.2.3.4",
|
||||||
|
"public_ip6:2a03::1"
|
||||||
|
],
|
||||||
|
"last_check": 1630612478
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The packet contains information what's the server hostname and then list of labels describing
|
||||||
|
what's running on it and what are the IP addresses. What's in the labels is completely up to you
|
||||||
|
but in some use-cases (Node Exporter API endpoint) it expects "NAME:VALUE" format.
|
||||||
|
|
||||||
|
The labels can be configured via environment variables but also as files located in
|
||||||
|
*/etc/lobby/labels* (configurable path) so it can dynamically change.
|
||||||
|
|
||||||
|
When everything is running just call your favorite http client against "http://localhost:1313/"
|
||||||
|
on any of the running instances and lobby returns you list of all available servers and
|
||||||
|
their labels. You can hook it to Prometheus, deployment scripts, CI/CD automations or
|
||||||
|
your internal system that sends emails and it needs to know where is the SMTP server for
|
||||||
|
example.
|
||||||
|
|
||||||
|
Lobby doesn't care if you have a one or thousand instances of it running. Each instance
|
||||||
|
is connected to a common point which is a [NATS server](https://nats.io/) in this case. NATS is super fast and reliable
|
||||||
|
messaging system which handles the communication part but also the high availability part.
|
||||||
|
NATS is easy to run and it offloads a huge part of the problem from lobby itself.
|
||||||
|
|
||||||
|
The code is open to support multiple backends and it's not that hard to add a new one.
|
||||||
|
|
||||||
|
## Quickstart guide
|
||||||
|
|
||||||
|
The quickest way how to run lobbyd on your server is this:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
wget -O /usr/local/bin/lobbyd https://....
|
||||||
|
chmod +x /usr/local/bin/lobbyd
|
||||||
|
|
||||||
|
# Update NATS_URL and LABELS here
|
||||||
|
cat << EOF > /etc/systemd/system/lobbyd.service
|
||||||
|
[Unit]
|
||||||
|
Description=Server Lobby service
|
||||||
|
After=network.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Environment="NATS_URL=tls://nats.example.com:4222"
|
||||||
|
Environment="LABELS=service:ns,ns:primary,public_ip4:1,2,3,4,public_ip6:2a03::1,location:prague"
|
||||||
|
ExecStart=/usr/local/bin/lobbyd
|
||||||
|
PrivateTmp=false
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
|
EOF
|
||||||
|
|
||||||
|
systemctl daemon-reload
|
||||||
|
systemctl start lobbyd
|
||||||
|
systemctl enable lobbyd
|
||||||
|
```
|
||||||
|
|
||||||
|
If you run lobbyd in production, consider to create its own system user and group and add both into this
|
||||||
|
service file. It doesn't need to access almost anything in your system.
|
||||||
|
|
||||||
|
## Daemon
|
||||||
|
|
||||||
|
There are other config directives you can use to fine-tune lobbyd to exactly what you need.
|
||||||
|
|
||||||
|
| Environment variable | Type | Default | Required | Note |
|
||||||
|
| ---------------------- | ------ | ----------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
|
| TOKEN | string | | no | Authentication token for API, if empty auth is disabled |
|
||||||
|
| HOST | string | 127.0.0.1 | no | IP address used for the REST server to listen |
|
||||||
|
| PORT | int | 1313 | no | Port related to the address above |
|
||||||
|
| NATS_URL | string | | yes | NATS URL used to connect to the NATS server |
|
||||||
|
| NATS_DISCOVERY_CHANNEL | string | lobby.discovery | no | Channel where the keep-alive packets are sent |
|
||||||
|
| LABELS | string | | no | List of labels, labels should be separated by comma |
|
||||||
|
| LABELS_PATH | string | /etc/lobby/labels | no | Path where filesystem based labels are located, one label per line, filename is not important for lobby |
|
||||||
|
| HOSTNAME | string | | no | Override local machine's hostname |
|
||||||
|
| CLEAN_EVERY | int | 15 | no | How often to clean the list of discovered servers to get rid of the not alive ones [secs] |
|
||||||
|
| KEEP_ALIVE | int | 5 | no | how often to send the keep-alive discovery message with all available information [secs] |
|
||||||
|
| TTL | int | 30 | no | After how many secs is discovery record considered as invalid |
|
||||||
|
| NODE_EXPORTER_PORT | int | 9100 | no | Default port where node_exporter listens on all registered servers, this is used when the special prometheus labels doesn't contain port |
|
||||||
|
| REGISTER | bool | true | no | If true (default) then local instance is registered with other instance (discovery packet is sent regularly), if false the daemon runs only as a client |
|
||||||
|
|
||||||
|
### Service discovery for Prometheus
|
||||||
|
|
||||||
|
Lobbyd has an API endpoint that returns list of targets for [Prometheus's HTTP SD config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#http_sd_config). That
|
||||||
|
allows you to use lobbyd to configure Prometheus dynamically based on running servers. There are special kind of labels that are used to set the output for Prometheus properly.
|
||||||
|
Let's check this:
|
||||||
|
|
||||||
|
prometheus:nodeexporter:host:192.168.1.1
|
||||||
|
prometheus:nodeexporter:port:9100
|
||||||
|
|
||||||
|
When you open URL http://localhost:1313/v1/prometheus/nodeexporter it returns this:
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"Labels": {
|
||||||
|
"location": "prague"
|
||||||
|
},
|
||||||
|
"Targets": [
|
||||||
|
"192.168.1.1:9100"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
"nodeexporter" can be anything you want. It determines name of the monitored service, the service that provides the */metrics* endpoint.
|
||||||
|
|
||||||
|
There is also a minimal way how to add server to the prometheus output. Simply set label *prometheus:nodeexporter* and it
|
||||||
|
will use default port from the environment variable above and hostname of the server
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"Labels": {},
|
||||||
|
"Targets": [
|
||||||
|
"192.168.1.1:9100"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
At least one prometheus label has to be set to export the monitoring service in the prometheus output.
|
||||||
|
|
||||||
|
## REST API
|
||||||
|
|
||||||
|
So far the REST API is super simple and it has only two endpoints:
|
||||||
|
|
||||||
|
GET / # Returns list of all discovered servers and their labels.
|
||||||
|
GET /v1/ # Same as /
|
||||||
|
GET /v1/prometheus/:name # Generates output for Prometheus's SD config, name is group of the monitoring services described above.
|
||||||
|
|
||||||
## TODO
|
## TODO
|
||||||
|
|
||||||
* [X] filtering based on labels
|
* [X] filtering based on labels
|
||||||
@ -9,7 +153,8 @@
|
|||||||
One file per one label
|
One file per one label
|
||||||
* [X] Deregistration
|
* [X] Deregistration
|
||||||
* [X] Deregister when the daemon exits
|
* [X] Deregister when the daemon exits
|
||||||
* [ ] Separate the NATS code so it can support multiple backend/drivers
|
* [X] Separate the NATS code so it can support multiple backend/drivers
|
||||||
* [ ] Documentation
|
* [X] Documentation
|
||||||
* [ ] Tests
|
* [ ] Tests
|
||||||
* [ ] Command hooks - script or list of scripts that are triggered when discovery status has changed
|
* [ ] Command hooks - script or list of scripts that are triggered when discovery status has changed
|
||||||
|
|
||||||
|
@ -8,20 +8,19 @@ import (
|
|||||||
|
|
||||||
// Config keeps info about configuration of this daemon
|
// Config keeps info about configuration of this daemon
|
||||||
type Config struct {
|
type Config struct {
|
||||||
Token string `envconfig:"TOKEN" required:"false"` // Authentication token, if empty auth is disabled
|
Token string `envconfig:"TOKEN" required:"false"` // Authentication token, if empty auth is disabled
|
||||||
Host string `envconfig:"HOST" required:"false" default:"127.0.0.1"` // IP address used for the REST server to listen
|
Host string `envconfig:"HOST" required:"false" default:"127.0.0.1"` // IP address used for the REST server to listen
|
||||||
Port uint16 `envconfig:"PORT" required:"false" default:"1313"` // Port related to the address above
|
Port uint16 `envconfig:"PORT" required:"false" default:"1313"` // Port related to the address above
|
||||||
NATSURL string `envconfig:"NATS_URL" required:"true"` // NATS URL used to connect to the NATS server
|
NATSURL string `envconfig:"NATS_URL" required:"true"` // NATS URL used to connect to the NATS server
|
||||||
NATSDiscoveryChannel string `envconfig:"NATS_DISCOVERY_CHANNEL" required:"true" default:"lobby.discovery"` // Channel where the kepp alive packets are sent
|
NATSDiscoveryChannel string `envconfig:"NATS_DISCOVERY_CHANNEL" required:"false" default:"lobby.discovery"` // Channel where the kepp alive packets are sent
|
||||||
Labels []string `envconfig:"LABELS" required:"false" default:""` // List of labels
|
Labels []string `envconfig:"LABELS" required:"false" default:""` // List of labels
|
||||||
LabelsPath string `envconfig:"LABELS_PATH" required:"false" default:"/etc/lobby/labels"` // Path where filesystem based labels are located
|
LabelsPath string `envconfig:"LABELS_PATH" required:"false" default:"/etc/lobby/labels"` // Path where filesystem based labels are located
|
||||||
// TemplatesPath string `envconfig:"TEMPLATES_PATH" required:"false" default:"/etc/lobby/templates"` // Path where templates are stored for custom output
|
HostName string `envconfig:"HOSTNAME" required:"false"` // Overrise local machine's hostname
|
||||||
HostName string `envconfig:"HOSTNAME" required:"false"` // Overrise local machine's hostname
|
CleanEvery uint `envconfig:"CLEAN_EVERY" required:"false" default:"15"` // How often to clean the list of servers to get rid of the not alive ones
|
||||||
CleanEvery uint `envconfig:"CLEAN_EVERY" required:"false" default:"15"` // How often to clean the list of servers to get rid of the not alive ones
|
KeepAlive uint `envconfig:"KEEP_ALIVE" required:"false" default:"5"` // how often to send the keepalive message with all availabel information [secs]
|
||||||
KeepAlive uint `envconfig:"KEEP_ALIVE" required:"false" default:"5"` // how often to send the keepalive message with all availabel information [secs]
|
TTL uint `envconfig:"TTL" required:"false" default:"30"` // After how many secs is discovery record considered as invalid
|
||||||
TTL uint `envconfig:"TTL" required:"false" default:"30"` // After how many secs is discovery record considered as invalid
|
NodeExporterPort uint `envconfig:"NODE_EXPORTER_PORT" required:"false" default:"9100"` // Default port where node_exporter listens on all registered servers
|
||||||
NodeExporterPort uint `envconfig:"NODE_EXPORTER_PORT" required:"false" default:"9100"` // Default port where node_exporter listens on all registered servers
|
Register bool `envconfig:"REGISTER" required:"false" default:"true"` // If true (default) then local instance is registered with other instance (discovery packet is sent regularly)
|
||||||
Register bool `envconfig:"REGISTER" required:"false" default:"true"` // If true (default) then local instance is registered with other instance (discovery packet is sent regularly)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetConfig return configuration created based on environment variables
|
// GetConfig return configuration created based on environment variables
|
||||||
|
@ -2,18 +2,20 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/labstack/echo"
|
"github.com/labstack/echo"
|
||||||
"github.com/rosti-cz/server_lobby/server"
|
"github.com/rosti-cz/server_lobby/server"
|
||||||
)
|
)
|
||||||
|
|
||||||
func listHandler(c echo.Context) error {
|
func listHandler(c echo.Context) error {
|
||||||
label := c.QueryParam("label")
|
labels := c.QueryParam("labels")
|
||||||
|
|
||||||
var discoveries []server.Discovery
|
var discoveries []server.Discovery
|
||||||
|
|
||||||
if len(label) > 0 {
|
if len(labels) > 0 {
|
||||||
discoveries = discoveryStorage.Filter(label)
|
labelsFilterSlice := strings.Split(labels, ",")
|
||||||
|
discoveries = discoveryStorage.Filter(labelsFilterSlice)
|
||||||
} else {
|
} else {
|
||||||
discoveries = discoveryStorage.GetAll()
|
discoveries = discoveryStorage.GetAll()
|
||||||
}
|
}
|
||||||
@ -22,7 +24,9 @@ func listHandler(c echo.Context) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func prometheusHandler(c echo.Context) error {
|
func prometheusHandler(c echo.Context) error {
|
||||||
services := preparePrometheusOutput(discoveryStorage.GetAll())
|
name := c.Param("name")
|
||||||
|
|
||||||
|
services := preparePrometheusOutput(name, discoveryStorage.GetAll())
|
||||||
|
|
||||||
return c.JSONPretty(http.StatusOK, services, " ")
|
return c.JSONPretty(http.StatusOK, services, " ")
|
||||||
}
|
}
|
||||||
|
@ -141,7 +141,7 @@ func main() {
|
|||||||
// Routes
|
// Routes
|
||||||
e.GET("/", listHandler)
|
e.GET("/", listHandler)
|
||||||
e.GET("/v1/", listHandler)
|
e.GET("/v1/", listHandler)
|
||||||
e.GET("/v1/prometheus", prometheusHandler)
|
e.GET("/v1/prometheus/:name", prometheusHandler)
|
||||||
|
|
||||||
// e.GET("/template/:template", func(c echo.Context) error {
|
// e.GET("/template/:template", func(c echo.Context) error {
|
||||||
// templateName := c.Param("template")
|
// templateName := c.Param("template")
|
||||||
|
@ -29,17 +29,18 @@ type PrometheusService struct {
|
|||||||
// preparePrometheusOutput returns PrometheusServices which is struct compatible to what Prometheus expects
|
// preparePrometheusOutput returns PrometheusServices which is struct compatible to what Prometheus expects
|
||||||
// labels starting "ne:" will be used as NodeExporter labels. Label "ne:port:9123" will be used as port
|
// labels starting "ne:" will be used as NodeExporter labels. Label "ne:port:9123" will be used as port
|
||||||
// used in the targets field. Same for "ne:host:1.2.3.4".
|
// used in the targets field. Same for "ne:host:1.2.3.4".
|
||||||
func preparePrometheusOutput(discoveries []server.Discovery) PrometheusServices {
|
func preparePrometheusOutput(name string, discoveries []server.Discovery) PrometheusServices {
|
||||||
services := PrometheusServices{}
|
services := PrometheusServices{}
|
||||||
|
|
||||||
for _, discovery := range discoveries {
|
for _, discovery := range discoveries {
|
||||||
port := strconv.Itoa(int(config.NodeExporterPort))
|
port := strconv.Itoa(int(config.NodeExporterPort))
|
||||||
host := discovery.Hostname
|
host := discovery.Hostname
|
||||||
|
var add bool // add to the prometheus output when there is at least one prometheus related label
|
||||||
|
|
||||||
labels := map[string]string{}
|
labels := map[string]string{}
|
||||||
|
|
||||||
for _, label := range discovery.FindLabels("ne") {
|
for _, label := range discovery.FindLabels("prometheus:" + name) {
|
||||||
trimmed := strings.TrimPrefix(label, "ne:")
|
trimmed := strings.TrimPrefix(label, "prometheus:"+name+":")
|
||||||
parts := strings.SplitN(trimmed, ":", 2)
|
parts := strings.SplitN(trimmed, ":", 2)
|
||||||
if len(parts) == 2 {
|
if len(parts) == 2 {
|
||||||
if parts[0] == "port" {
|
if parts[0] == "port" {
|
||||||
@ -49,15 +50,28 @@ func preparePrometheusOutput(discoveries []server.Discovery) PrometheusServices
|
|||||||
} else {
|
} else {
|
||||||
labels[parts[0]] = parts[1]
|
labels[parts[0]] = parts[1]
|
||||||
}
|
}
|
||||||
|
add = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
service := PrometheusService{
|
// This has to be checked here again because FindLabels adds : at the end of the label name.
|
||||||
Targets: []string{host + ":" + port},
|
if !add {
|
||||||
Labels: labels,
|
for _, label := range discovery.Labels {
|
||||||
|
if label == "prometheus:"+name {
|
||||||
|
add = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
services = append(services, service)
|
if add {
|
||||||
|
service := PrometheusService{
|
||||||
|
Targets: []string{host + ":" + port},
|
||||||
|
Labels: labels,
|
||||||
|
}
|
||||||
|
|
||||||
|
services = append(services, service)
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -147,14 +147,22 @@ func (d *Discoveries) GetAll() []Discovery {
|
|||||||
return d.activeServers
|
return d.activeServers
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *Discoveries) Filter(labelFilter string) []Discovery {
|
func (d *Discoveries) Filter(labelsFilter []string) []Discovery {
|
||||||
newSet := []Discovery{}
|
newSet := []Discovery{}
|
||||||
|
|
||||||
if len(labelFilter) > 0 {
|
var found bool
|
||||||
|
if len(labelsFilter) > 0 {
|
||||||
for _, discovery := range d.activeServers {
|
for _, discovery := range d.activeServers {
|
||||||
|
found = false
|
||||||
for _, label := range discovery.Labels {
|
for _, label := range discovery.Labels {
|
||||||
if label == labelFilter {
|
for _, labelFilter := range labelsFilter {
|
||||||
newSet = append(newSet, discovery)
|
if label == labelFilter {
|
||||||
|
newSet = append(newSet, discovery)
|
||||||
|
found = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if found {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user