Add Prometheus metric to count upload errors (#8788)

Add Prometheus metric to count upload errors (#8775)

Add SeaweedFS_upload_error_total counter labeled by HTTP status code,
so operators can alert on write/replication failures. Code "0" indicates
a transport error (no HTTP response received).

Also add an "Upload Errors" panel to the Grafana dashboard.
This commit is contained in:
Chris Lu
2026-03-26 16:58:05 -07:00
committed by GitHub
parent 17028fbf59
commit 5fa5507234
3 changed files with 143 additions and 27 deletions

View File

@@ -458,6 +458,13 @@ var (
Name: "bucket_object_count",
Help: "Current number of objects in each S3 bucket (logical count, deduplicated across replicas).",
}, []string{"bucket"})
UploadErrorCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: Namespace,
Name: "upload_error_total",
Help: "Counter of upload errors by HTTP status code. Code 0 means transport error (no response received).",
}, []string{"code"})
)
func init() {
@@ -519,6 +526,8 @@ func init() {
Gather.MustRegister(S3BucketPhysicalSizeBytesGauge)
Gather.MustRegister(S3BucketObjectCountGauge)
Gather.MustRegister(UploadErrorCounter)
go bucketMetricTTLControl()
}