Add Prometheus metric to count upload errors (#8788)

Add Prometheus metric to count upload errors (#8775)

Add SeaweedFS_upload_error_total counter labeled by HTTP status code,
so operators can alert on write/replication failures. Code "0" indicates
a transport error (no HTTP response received).

Also add an "Upload Errors" panel to the Grafana dashboard.
This commit is contained in:
Chris Lu
2026-03-26 16:58:05 -07:00
committed by GitHub
parent 17028fbf59
commit 5fa5507234
3 changed files with 143 additions and 27 deletions

View File

@@ -11,6 +11,7 @@ import (
"net/http"
"net/textproto"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
@@ -415,6 +416,7 @@ func (uploader *Uploader) upload_content(ctx context.Context, fillBufferFunction
}
}
if post_err != nil {
stats.UploadErrorCounter.WithLabelValues("0").Inc()
return nil, fmt.Errorf("upload %s %d bytes to %v: %v", option.Filename, originalDataSize, option.UploadUrl, post_err)
}
// print("-")
@@ -428,15 +430,18 @@ func (uploader *Uploader) upload_content(ctx context.Context, fillBufferFunction
resp_body, ra_err := io.ReadAll(resp.Body)
if ra_err != nil {
stats.UploadErrorCounter.WithLabelValues(strconv.Itoa(resp.StatusCode)).Inc()
return nil, fmt.Errorf("read response body %v: %w", option.UploadUrl, ra_err)
}
unmarshal_err := json.Unmarshal(resp_body, &ret)
if unmarshal_err != nil {
stats.UploadErrorCounter.WithLabelValues(strconv.Itoa(resp.StatusCode)).Inc()
glog.ErrorfCtx(ctx, "unmarshal %s: %v", option.UploadUrl, string(resp_body))
return nil, fmt.Errorf("unmarshal %v: %w", option.UploadUrl, unmarshal_err)
}
if ret.Error != "" {
stats.UploadErrorCounter.WithLabelValues(strconv.Itoa(resp.StatusCode)).Inc()
return nil, fmt.Errorf("unmarshalled error %v: %v", option.UploadUrl, ret.Error)
}
ret.ETag = etag