fix(telemetry): use correct TopologyId field in integration test (#8714)

* fix(telemetry): use correct TopologyId field in integration test

The proto field was renamed from cluster_id to topology_id but the
integration test was not updated, causing a compilation error.

* ci: add telemetry integration test workflow

Runs the telemetry integration test (server startup, protobuf
marshaling, client send, metrics/stats/instances API checks) on
changes to telemetry/ or weed/telemetry/.

* fix(telemetry): improve error message specificity in integration test

* fix(ci): pre-build telemetry server binary for integration test

go run compiles the server on the fly, which exceeds the 15s startup
timeout in CI. Build the binary first so the test starts instantly.

* fix(telemetry): fix ClusterId references in server and CI build path

- Replace ClusterId with TopologyId in server storage and API handler
  (same rename as the integration test fix)
- Fix CI build: telemetry server has its own go.mod, so build from
  within its directory

* ci(telemetry): add least-privilege permissions to workflow

Scope the workflow token to read-only repository contents, matching
the convention used in go.yml.

* fix(telemetry): set TopologyId in client integration test

The client only populates TopologyId when SetTopologyId has been
called. The test was missing this call, causing the server to reject
the request with 400 (missing required field).

* fix(telemetry): delete clusterInfo metric on instance cleanup

The cleanup loop removed all per-instance metrics except clusterInfo,
leaking that label set after eviction.
This commit is contained in:
Chris Lu
2026-03-20 22:15:05 -07:00
committed by GitHub
parent 002e325b74
commit ba855f9962
7 changed files with 89 additions and 36 deletions

View File

@@ -0,0 +1,46 @@
name: Telemetry Integration Tests
on:
push:
branches: [ master ]
paths:
- 'telemetry/**'
- 'weed/telemetry/**'
- '.github/workflows/telemetry-integration.yml'
pull_request:
branches: [ master ]
paths:
- 'telemetry/**'
- 'weed/telemetry/**'
- '.github/workflows/telemetry-integration.yml'
permissions:
contents: read
jobs:
telemetry-integration-test:
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- name: Checkout code
uses: actions/checkout@v6
- name: Set up Go
uses: actions/setup-go@v6
with:
go-version-file: 'go.mod'
- name: Build telemetry server
run: cd telemetry/server && go build -o telemetry-server .
- name: Run telemetry integration test
run: go run telemetry/test/integration.go
- name: Upload test logs on failure
if: failure()
uses: actions/upload-artifact@v7
with:
name: telemetry-test-logs
path: telemetry-server-test.log
retention-days: 7

1
.gitignore vendored
View File

@@ -141,4 +141,5 @@ test/s3/iam/.test_env
/test/erasure_coding/admin_dockertest/tmp /test/erasure_coding/admin_dockertest/tmp
/test/erasure_coding/admin_dockertest/task_logs /test/erasure_coding/admin_dockertest/task_logs
weed_bin weed_bin
telemetry/server/telemetry-server
.aider* .aider*

View File

@@ -54,7 +54,7 @@ func (h *Handler) CollectTelemetry(w http.ResponseWriter, r *http.Request) {
} }
// Validate required fields // Validate required fields
if data.ClusterId == "" || data.Version == "" || data.Os == "" { if data.TopologyId == "" || data.Version == "" || data.Os == "" {
http.Error(w, "Missing required fields", http.StatusBadRequest) http.Error(w, "Missing required fields", http.StatusBadRequest)
return return
} }

View File

@@ -1,8 +1,6 @@
module github.com/seaweedfs/seaweedfs/telemetry/server module github.com/seaweedfs/seaweedfs/telemetry/server
go 1.25 go 1.25.0
toolchain go1.25.0
require ( require (
github.com/prometheus/client_golang v1.23.2 github.com/prometheus/client_golang v1.23.2
@@ -15,10 +13,10 @@ require (
github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.66.1 // indirect github.com/prometheus/common v0.67.2 // indirect
github.com/prometheus/procfs v0.19.2 // indirect github.com/prometheus/procfs v0.20.1 // indirect
go.yaml.in/yaml/v2 v2.4.2 // indirect go.yaml.in/yaml/v2 v2.4.3 // indirect
golang.org/x/sys v0.39.0 // indirect golang.org/x/sys v0.42.0 // indirect
) )
replace github.com/seaweedfs/seaweedfs => ../.. replace github.com/seaweedfs/seaweedfs => ../..

View File

@@ -6,8 +6,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= github.com/klauspost/compress v1.18.4 h1:RPhnKRAQ4Fh8zU2FY/6ZFDwTVTxgJ/EMydqSTzE9a2c=
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/klauspost/compress v1.18.4/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
@@ -22,24 +22,21 @@ github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h
github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs= github.com/prometheus/common v0.67.2 h1:PcBAckGFTIHt2+L3I33uNRTlKTplNzFctXcWhPyAEN8=
github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= github.com/prometheus/common v0.67.2/go.mod h1:63W3KZb1JOKgcjlIr64WW/LvFGAqKPj0atm+knVGEko=
github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0= github.com/prometheus/procfs v0.20.1 h1:XwbrGOIplXW/AU3YhIhLODXMJYyC1isLFfYCsTEycfc=
github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw= github.com/prometheus/procfs v0.20.1/go.mod h1:o9EMBZGRyvDrSPH1RqdxhojkuXstoe4UlK79eF5TGGo=
github.com/prometheus/procfs v0.19.2/go.mod h1:M0aotyiemPhBCM0z5w87kL22CxfcH05ZpYlu+b4J7mw=
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0=
go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8=
golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=

View File

@@ -82,7 +82,7 @@ func (s *PrometheusStorage) StoreTelemetry(data *proto.TelemetryData) error {
// Update Prometheus metrics // Update Prometheus metrics
labels := prometheus.Labels{ labels := prometheus.Labels{
"cluster_id": data.ClusterId, "cluster_id": data.TopologyId,
"version": data.Version, "version": data.Version,
"os": data.Os, "os": data.Os,
} }
@@ -94,7 +94,7 @@ func (s *PrometheusStorage) StoreTelemetry(data *proto.TelemetryData) error {
s.brokerCount.With(labels).Set(float64(data.BrokerCount)) s.brokerCount.With(labels).Set(float64(data.BrokerCount))
infoLabels := prometheus.Labels{ infoLabels := prometheus.Labels{
"cluster_id": data.ClusterId, "cluster_id": data.TopologyId,
"version": data.Version, "version": data.Version,
"os": data.Os, "os": data.Os,
} }
@@ -103,7 +103,7 @@ func (s *PrometheusStorage) StoreTelemetry(data *proto.TelemetryData) error {
s.telemetryReceived.Inc() s.telemetryReceived.Inc()
// Store in memory for API endpoints // Store in memory for API endpoints
s.instances[data.ClusterId] = &telemetryData{ s.instances[data.TopologyId] = &telemetryData{
TelemetryData: data, TelemetryData: data,
ReceivedAt: time.Now().UTC(), ReceivedAt: time.Now().UTC(),
} }
@@ -219,7 +219,7 @@ func (s *PrometheusStorage) CleanupOldInstances(maxAge time.Duration) {
// Remove from Prometheus metrics // Remove from Prometheus metrics
labels := prometheus.Labels{ labels := prometheus.Labels{
"cluster_id": instance.TelemetryData.ClusterId, "cluster_id": instance.TelemetryData.TopologyId,
"version": instance.TelemetryData.Version, "version": instance.TelemetryData.Version,
"os": instance.TelemetryData.Os, "os": instance.TelemetryData.Os,
} }
@@ -228,6 +228,7 @@ func (s *PrometheusStorage) CleanupOldInstances(maxAge time.Duration) {
s.totalVolumeCount.Delete(labels) s.totalVolumeCount.Delete(labels)
s.filerCount.Delete(labels) s.filerCount.Delete(labels)
s.brokerCount.Delete(labels) s.brokerCount.Delete(labels)
s.clusterInfo.Delete(labels)
} }
} }

View File

@@ -85,16 +85,25 @@ func startTelemetryServer() (*exec.Cmd, error) {
return nil, fmt.Errorf("failed to get working directory: %v", err) return nil, fmt.Errorf("failed to get working directory: %v", err)
} }
// Navigate to the server directory (from main seaweedfs directory) // Use pre-built binary if available (faster in CI), otherwise fall back to go run
serverDir := filepath.Join(testDir, "telemetry", "server") args := []string{
cmd := exec.Command("go", "run", ".",
"-port=" + serverPort, "-port=" + serverPort,
"-dashboard=false", "-dashboard=false",
"-cleanup=1m", "-cleanup=1m",
"-max-age=1h") "-max-age=1h",
}
serverBin := filepath.Join(testDir, "telemetry", "server", "telemetry-server")
var cmd *exec.Cmd
if _, err := os.Stat(serverBin); err == nil {
fmt.Printf("Using pre-built binary: %s\n", serverBin)
cmd = exec.Command(serverBin, args...)
} else {
fmt.Println("No pre-built binary found, using go run")
serverDir := filepath.Join(testDir, "telemetry", "server")
cmd = exec.Command("go", append([]string{"run", "."}, args...)...)
cmd.Dir = serverDir cmd.Dir = serverDir
}
// Create log files for server output // Create log files for server output
logFile, err := os.Create("telemetry-server-test.log") logFile, err := os.Create("telemetry-server-test.log")
@@ -174,9 +183,9 @@ func testProtobufMarshaling() error {
} }
// Verify data // Verify data
if testData2.ClusterId != testData.ClusterId { if testData2.TopologyId != testData.TopologyId {
return fmt.Errorf("protobuf data mismatch: expected %s, got %s", return fmt.Errorf("TopologyId mismatch: expected %s, got %s",
testData.ClusterId, testData2.ClusterId) testData.TopologyId, testData2.TopologyId)
} }
if testData2.VolumeServerCount != testData.VolumeServerCount { if testData2.VolumeServerCount != testData.VolumeServerCount {
@@ -190,6 +199,7 @@ func testProtobufMarshaling() error {
func testTelemetryClient() error { func testTelemetryClient() error {
// Create telemetry client // Create telemetry client
client := telemetry.NewClient(serverURL+"/api/collect", true) client := telemetry.NewClient(serverURL+"/api/collect", true)
client.SetTopologyId("test-topology-12345")
// Create test data using protobuf format // Create test data using protobuf format
testData := &proto.TelemetryData{ testData := &proto.TelemetryData{