fix(telemetry): use correct TopologyId field in integration test (#8714)
* fix(telemetry): use correct TopologyId field in integration test The proto field was renamed from cluster_id to topology_id but the integration test was not updated, causing a compilation error. * ci: add telemetry integration test workflow Runs the telemetry integration test (server startup, protobuf marshaling, client send, metrics/stats/instances API checks) on changes to telemetry/ or weed/telemetry/. * fix(telemetry): improve error message specificity in integration test * fix(ci): pre-build telemetry server binary for integration test go run compiles the server on the fly, which exceeds the 15s startup timeout in CI. Build the binary first so the test starts instantly. * fix(telemetry): fix ClusterId references in server and CI build path - Replace ClusterId with TopologyId in server storage and API handler (same rename as the integration test fix) - Fix CI build: telemetry server has its own go.mod, so build from within its directory * ci(telemetry): add least-privilege permissions to workflow Scope the workflow token to read-only repository contents, matching the convention used in go.yml. * fix(telemetry): set TopologyId in client integration test The client only populates TopologyId when SetTopologyId has been called. The test was missing this call, causing the server to reject the request with 400 (missing required field). * fix(telemetry): delete clusterInfo metric on instance cleanup The cleanup loop removed all per-instance metrics except clusterInfo, leaking that label set after eviction.
This commit is contained in:
46
.github/workflows/telemetry-integration.yml
vendored
Normal file
46
.github/workflows/telemetry-integration.yml
vendored
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
name: Telemetry Integration Tests
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ master ]
|
||||||
|
paths:
|
||||||
|
- 'telemetry/**'
|
||||||
|
- 'weed/telemetry/**'
|
||||||
|
- '.github/workflows/telemetry-integration.yml'
|
||||||
|
pull_request:
|
||||||
|
branches: [ master ]
|
||||||
|
paths:
|
||||||
|
- 'telemetry/**'
|
||||||
|
- 'weed/telemetry/**'
|
||||||
|
- '.github/workflows/telemetry-integration.yml'
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
telemetry-integration-test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
timeout-minutes: 5
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v6
|
||||||
|
|
||||||
|
- name: Set up Go
|
||||||
|
uses: actions/setup-go@v6
|
||||||
|
with:
|
||||||
|
go-version-file: 'go.mod'
|
||||||
|
|
||||||
|
- name: Build telemetry server
|
||||||
|
run: cd telemetry/server && go build -o telemetry-server .
|
||||||
|
|
||||||
|
- name: Run telemetry integration test
|
||||||
|
run: go run telemetry/test/integration.go
|
||||||
|
|
||||||
|
- name: Upload test logs on failure
|
||||||
|
if: failure()
|
||||||
|
uses: actions/upload-artifact@v7
|
||||||
|
with:
|
||||||
|
name: telemetry-test-logs
|
||||||
|
path: telemetry-server-test.log
|
||||||
|
retention-days: 7
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -141,4 +141,5 @@ test/s3/iam/.test_env
|
|||||||
/test/erasure_coding/admin_dockertest/tmp
|
/test/erasure_coding/admin_dockertest/tmp
|
||||||
/test/erasure_coding/admin_dockertest/task_logs
|
/test/erasure_coding/admin_dockertest/task_logs
|
||||||
weed_bin
|
weed_bin
|
||||||
|
telemetry/server/telemetry-server
|
||||||
.aider*
|
.aider*
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ func (h *Handler) CollectTelemetry(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Validate required fields
|
// Validate required fields
|
||||||
if data.ClusterId == "" || data.Version == "" || data.Os == "" {
|
if data.TopologyId == "" || data.Version == "" || data.Os == "" {
|
||||||
http.Error(w, "Missing required fields", http.StatusBadRequest)
|
http.Error(w, "Missing required fields", http.StatusBadRequest)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,8 +1,6 @@
|
|||||||
module github.com/seaweedfs/seaweedfs/telemetry/server
|
module github.com/seaweedfs/seaweedfs/telemetry/server
|
||||||
|
|
||||||
go 1.25
|
go 1.25.0
|
||||||
|
|
||||||
toolchain go1.25.0
|
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/prometheus/client_golang v1.23.2
|
github.com/prometheus/client_golang v1.23.2
|
||||||
@@ -15,10 +13,10 @@ require (
|
|||||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||||
github.com/prometheus/client_model v0.6.2 // indirect
|
github.com/prometheus/client_model v0.6.2 // indirect
|
||||||
github.com/prometheus/common v0.66.1 // indirect
|
github.com/prometheus/common v0.67.2 // indirect
|
||||||
github.com/prometheus/procfs v0.19.2 // indirect
|
github.com/prometheus/procfs v0.20.1 // indirect
|
||||||
go.yaml.in/yaml/v2 v2.4.2 // indirect
|
go.yaml.in/yaml/v2 v2.4.3 // indirect
|
||||||
golang.org/x/sys v0.39.0 // indirect
|
golang.org/x/sys v0.42.0 // indirect
|
||||||
)
|
)
|
||||||
|
|
||||||
replace github.com/seaweedfs/seaweedfs => ../..
|
replace github.com/seaweedfs/seaweedfs => ../..
|
||||||
|
|||||||
@@ -6,8 +6,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1
|
|||||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||||
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||||
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
|
github.com/klauspost/compress v1.18.4 h1:RPhnKRAQ4Fh8zU2FY/6ZFDwTVTxgJ/EMydqSTzE9a2c=
|
||||||
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
|
github.com/klauspost/compress v1.18.4/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
|
||||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||||
@@ -22,24 +22,21 @@ github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h
|
|||||||
github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
|
github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
|
||||||
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
|
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
|
||||||
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
|
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
|
||||||
github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
|
github.com/prometheus/common v0.67.2 h1:PcBAckGFTIHt2+L3I33uNRTlKTplNzFctXcWhPyAEN8=
|
||||||
github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
|
github.com/prometheus/common v0.67.2/go.mod h1:63W3KZb1JOKgcjlIr64WW/LvFGAqKPj0atm+knVGEko=
|
||||||
github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0=
|
github.com/prometheus/procfs v0.20.1 h1:XwbrGOIplXW/AU3YhIhLODXMJYyC1isLFfYCsTEycfc=
|
||||||
github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw=
|
github.com/prometheus/procfs v0.20.1/go.mod h1:o9EMBZGRyvDrSPH1RqdxhojkuXstoe4UlK79eF5TGGo=
|
||||||
github.com/prometheus/procfs v0.19.2/go.mod h1:M0aotyiemPhBCM0z5w87kL22CxfcH05ZpYlu+b4J7mw=
|
|
||||||
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
|
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
|
||||||
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
|
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
|
||||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||||
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
|
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
|
||||||
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
|
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
|
||||||
go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
|
go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0=
|
||||||
go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
|
go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8=
|
||||||
golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k=
|
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
|
||||||
golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
|
||||||
golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
|
||||||
google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
|
|
||||||
google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
|
|
||||||
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
|
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||||
|
|||||||
@@ -82,7 +82,7 @@ func (s *PrometheusStorage) StoreTelemetry(data *proto.TelemetryData) error {
|
|||||||
|
|
||||||
// Update Prometheus metrics
|
// Update Prometheus metrics
|
||||||
labels := prometheus.Labels{
|
labels := prometheus.Labels{
|
||||||
"cluster_id": data.ClusterId,
|
"cluster_id": data.TopologyId,
|
||||||
"version": data.Version,
|
"version": data.Version,
|
||||||
"os": data.Os,
|
"os": data.Os,
|
||||||
}
|
}
|
||||||
@@ -94,7 +94,7 @@ func (s *PrometheusStorage) StoreTelemetry(data *proto.TelemetryData) error {
|
|||||||
s.brokerCount.With(labels).Set(float64(data.BrokerCount))
|
s.brokerCount.With(labels).Set(float64(data.BrokerCount))
|
||||||
|
|
||||||
infoLabels := prometheus.Labels{
|
infoLabels := prometheus.Labels{
|
||||||
"cluster_id": data.ClusterId,
|
"cluster_id": data.TopologyId,
|
||||||
"version": data.Version,
|
"version": data.Version,
|
||||||
"os": data.Os,
|
"os": data.Os,
|
||||||
}
|
}
|
||||||
@@ -103,7 +103,7 @@ func (s *PrometheusStorage) StoreTelemetry(data *proto.TelemetryData) error {
|
|||||||
s.telemetryReceived.Inc()
|
s.telemetryReceived.Inc()
|
||||||
|
|
||||||
// Store in memory for API endpoints
|
// Store in memory for API endpoints
|
||||||
s.instances[data.ClusterId] = &telemetryData{
|
s.instances[data.TopologyId] = &telemetryData{
|
||||||
TelemetryData: data,
|
TelemetryData: data,
|
||||||
ReceivedAt: time.Now().UTC(),
|
ReceivedAt: time.Now().UTC(),
|
||||||
}
|
}
|
||||||
@@ -219,7 +219,7 @@ func (s *PrometheusStorage) CleanupOldInstances(maxAge time.Duration) {
|
|||||||
|
|
||||||
// Remove from Prometheus metrics
|
// Remove from Prometheus metrics
|
||||||
labels := prometheus.Labels{
|
labels := prometheus.Labels{
|
||||||
"cluster_id": instance.TelemetryData.ClusterId,
|
"cluster_id": instance.TelemetryData.TopologyId,
|
||||||
"version": instance.TelemetryData.Version,
|
"version": instance.TelemetryData.Version,
|
||||||
"os": instance.TelemetryData.Os,
|
"os": instance.TelemetryData.Os,
|
||||||
}
|
}
|
||||||
@@ -228,6 +228,7 @@ func (s *PrometheusStorage) CleanupOldInstances(maxAge time.Duration) {
|
|||||||
s.totalVolumeCount.Delete(labels)
|
s.totalVolumeCount.Delete(labels)
|
||||||
s.filerCount.Delete(labels)
|
s.filerCount.Delete(labels)
|
||||||
s.brokerCount.Delete(labels)
|
s.brokerCount.Delete(labels)
|
||||||
|
s.clusterInfo.Delete(labels)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -85,16 +85,25 @@ func startTelemetryServer() (*exec.Cmd, error) {
|
|||||||
return nil, fmt.Errorf("failed to get working directory: %v", err)
|
return nil, fmt.Errorf("failed to get working directory: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Navigate to the server directory (from main seaweedfs directory)
|
// Use pre-built binary if available (faster in CI), otherwise fall back to go run
|
||||||
serverDir := filepath.Join(testDir, "telemetry", "server")
|
args := []string{
|
||||||
|
"-port=" + serverPort,
|
||||||
cmd := exec.Command("go", "run", ".",
|
|
||||||
"-port="+serverPort,
|
|
||||||
"-dashboard=false",
|
"-dashboard=false",
|
||||||
"-cleanup=1m",
|
"-cleanup=1m",
|
||||||
"-max-age=1h")
|
"-max-age=1h",
|
||||||
|
}
|
||||||
|
|
||||||
|
serverBin := filepath.Join(testDir, "telemetry", "server", "telemetry-server")
|
||||||
|
var cmd *exec.Cmd
|
||||||
|
if _, err := os.Stat(serverBin); err == nil {
|
||||||
|
fmt.Printf("Using pre-built binary: %s\n", serverBin)
|
||||||
|
cmd = exec.Command(serverBin, args...)
|
||||||
|
} else {
|
||||||
|
fmt.Println("No pre-built binary found, using go run")
|
||||||
|
serverDir := filepath.Join(testDir, "telemetry", "server")
|
||||||
|
cmd = exec.Command("go", append([]string{"run", "."}, args...)...)
|
||||||
cmd.Dir = serverDir
|
cmd.Dir = serverDir
|
||||||
|
}
|
||||||
|
|
||||||
// Create log files for server output
|
// Create log files for server output
|
||||||
logFile, err := os.Create("telemetry-server-test.log")
|
logFile, err := os.Create("telemetry-server-test.log")
|
||||||
@@ -174,9 +183,9 @@ func testProtobufMarshaling() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Verify data
|
// Verify data
|
||||||
if testData2.ClusterId != testData.ClusterId {
|
if testData2.TopologyId != testData.TopologyId {
|
||||||
return fmt.Errorf("protobuf data mismatch: expected %s, got %s",
|
return fmt.Errorf("TopologyId mismatch: expected %s, got %s",
|
||||||
testData.ClusterId, testData2.ClusterId)
|
testData.TopologyId, testData2.TopologyId)
|
||||||
}
|
}
|
||||||
|
|
||||||
if testData2.VolumeServerCount != testData.VolumeServerCount {
|
if testData2.VolumeServerCount != testData.VolumeServerCount {
|
||||||
@@ -190,6 +199,7 @@ func testProtobufMarshaling() error {
|
|||||||
func testTelemetryClient() error {
|
func testTelemetryClient() error {
|
||||||
// Create telemetry client
|
// Create telemetry client
|
||||||
client := telemetry.NewClient(serverURL+"/api/collect", true)
|
client := telemetry.NewClient(serverURL+"/api/collect", true)
|
||||||
|
client.SetTopologyId("test-topology-12345")
|
||||||
|
|
||||||
// Create test data using protobuf format
|
// Create test data using protobuf format
|
||||||
testData := &proto.TelemetryData{
|
testData := &proto.TelemetryData{
|
||||||
|
|||||||
Reference in New Issue
Block a user