add telemetry (#6926)
* add telemetry * fix go mod * add default telemetry server url * Update README.md * replace with broker count instead of s3 count * Update telemetry.pb.go * github action to deploy
This commit is contained in:
157
.github/workflows/deploy_telemetry.yml
vendored
Normal file
157
.github/workflows/deploy_telemetry.yml
vendored
Normal file
@@ -0,0 +1,157 @@
|
|||||||
|
# This workflow will build and deploy the SeaweedFS telemetry server
|
||||||
|
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go
|
||||||
|
|
||||||
|
name: Deploy Telemetry Server
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ "master" ]
|
||||||
|
paths:
|
||||||
|
- 'telemetry/**'
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
setup:
|
||||||
|
description: 'Run first-time server setup'
|
||||||
|
required: true
|
||||||
|
type: boolean
|
||||||
|
default: false
|
||||||
|
deploy:
|
||||||
|
description: 'Deploy telemetry server to remote server'
|
||||||
|
required: true
|
||||||
|
type: boolean
|
||||||
|
default: false
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
deploy:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Go
|
||||||
|
uses: actions/setup-go@v4
|
||||||
|
with:
|
||||||
|
go-version: '1.24'
|
||||||
|
|
||||||
|
- name: Build Telemetry Server
|
||||||
|
run: |
|
||||||
|
go mod tidy
|
||||||
|
cd telemetry/server
|
||||||
|
GOOS=linux GOARCH=amd64 go build -o telemetry-server main.go
|
||||||
|
|
||||||
|
- name: First-time Server Setup
|
||||||
|
if: github.event_name == 'workflow_dispatch' && inputs.setup
|
||||||
|
env:
|
||||||
|
SSH_PRIVATE_KEY: ${{ secrets.TELEMETRY_SSH_PRIVATE_KEY }}
|
||||||
|
REMOTE_HOST: ${{ secrets.TELEMETRY_HOST }}
|
||||||
|
REMOTE_USER: ${{ secrets.TELEMETRY_USER }}
|
||||||
|
run: |
|
||||||
|
mkdir -p ~/.ssh
|
||||||
|
echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key
|
||||||
|
chmod 600 ~/.ssh/deploy_key
|
||||||
|
echo "Host *" > ~/.ssh/config
|
||||||
|
echo " StrictHostKeyChecking no" >> ~/.ssh/config
|
||||||
|
|
||||||
|
# Create all required directories with proper permissions
|
||||||
|
ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST "
|
||||||
|
mkdir -p ~/seaweedfs-telemetry/bin ~/seaweedfs-telemetry/logs ~/seaweedfs-telemetry/data ~/seaweedfs-telemetry/tmp && \
|
||||||
|
chmod 755 ~/seaweedfs-telemetry/logs && \
|
||||||
|
chmod 755 ~/seaweedfs-telemetry/data && \
|
||||||
|
touch ~/seaweedfs-telemetry/logs/telemetry.log ~/seaweedfs-telemetry/logs/telemetry.error.log && \
|
||||||
|
chmod 644 ~/seaweedfs-telemetry/logs/*.log"
|
||||||
|
|
||||||
|
# Create systemd service file
|
||||||
|
echo "
|
||||||
|
[Unit]
|
||||||
|
Description=SeaweedFS Telemetry Server
|
||||||
|
After=network.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
User=$REMOTE_USER
|
||||||
|
WorkingDirectory=/home/$REMOTE_USER/seaweedfs-telemetry
|
||||||
|
ExecStart=/home/$REMOTE_USER/seaweedfs-telemetry/bin/telemetry-server -port=8353
|
||||||
|
Restart=always
|
||||||
|
RestartSec=5
|
||||||
|
StandardOutput=append:/home/$REMOTE_USER/seaweedfs-telemetry/logs/telemetry.log
|
||||||
|
StandardError=append:/home/$REMOTE_USER/seaweedfs-telemetry/logs/telemetry.error.log
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target" > telemetry.service
|
||||||
|
|
||||||
|
# Setup logrotate configuration
|
||||||
|
echo "# SeaweedFS Telemetry service log rotation
|
||||||
|
/home/$REMOTE_USER/seaweedfs-telemetry/logs/*.log {
|
||||||
|
daily
|
||||||
|
rotate 30
|
||||||
|
compress
|
||||||
|
delaycompress
|
||||||
|
missingok
|
||||||
|
notifempty
|
||||||
|
create 644 $REMOTE_USER $REMOTE_USER
|
||||||
|
postrotate
|
||||||
|
systemctl restart telemetry.service
|
||||||
|
endscript
|
||||||
|
}" > telemetry_logrotate
|
||||||
|
|
||||||
|
# Copy Grafana dashboard and Prometheus config
|
||||||
|
scp -i ~/.ssh/deploy_key telemetry/grafana-dashboard.json $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/
|
||||||
|
scp -i ~/.ssh/deploy_key telemetry/prometheus.yml $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/
|
||||||
|
|
||||||
|
# Copy and install service and logrotate files
|
||||||
|
scp -i ~/.ssh/deploy_key telemetry.service telemetry_logrotate $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/
|
||||||
|
ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST "
|
||||||
|
sudo mv ~/seaweedfs-telemetry/telemetry.service /etc/systemd/system/ && \
|
||||||
|
sudo mv ~/seaweedfs-telemetry/telemetry_logrotate /etc/logrotate.d/seaweedfs-telemetry && \
|
||||||
|
sudo systemctl daemon-reload && \
|
||||||
|
sudo systemctl enable telemetry.service"
|
||||||
|
|
||||||
|
rm -f ~/.ssh/deploy_key
|
||||||
|
|
||||||
|
- name: Deploy Telemetry Server to Remote Server
|
||||||
|
if: (github.event_name == 'push' && contains(github.ref, 'refs/heads/master')) || (github.event_name == 'workflow_dispatch' && inputs.deploy)
|
||||||
|
env:
|
||||||
|
SSH_PRIVATE_KEY: ${{ secrets.TELEMETRY_SSH_PRIVATE_KEY }}
|
||||||
|
REMOTE_HOST: ${{ secrets.TELEMETRY_HOST }}
|
||||||
|
REMOTE_USER: ${{ secrets.TELEMETRY_USER }}
|
||||||
|
run: |
|
||||||
|
mkdir -p ~/.ssh
|
||||||
|
echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key
|
||||||
|
chmod 600 ~/.ssh/deploy_key
|
||||||
|
echo "Host *" > ~/.ssh/config
|
||||||
|
echo " StrictHostKeyChecking no" >> ~/.ssh/config
|
||||||
|
|
||||||
|
# Create temp directory and copy binary
|
||||||
|
ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST "mkdir -p ~/seaweedfs-telemetry/tmp"
|
||||||
|
scp -i ~/.ssh/deploy_key telemetry/server/telemetry-server $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/tmp/
|
||||||
|
|
||||||
|
# Copy updated configuration files
|
||||||
|
scp -i ~/.ssh/deploy_key telemetry/grafana-dashboard.json $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/
|
||||||
|
scp -i ~/.ssh/deploy_key telemetry/prometheus.yml $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/
|
||||||
|
|
||||||
|
# Stop service, move binary, and restart
|
||||||
|
ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST "
|
||||||
|
sudo systemctl stop telemetry.service || true && \
|
||||||
|
mkdir -p ~/seaweedfs-telemetry/bin && \
|
||||||
|
mv ~/seaweedfs-telemetry/tmp/telemetry-server ~/seaweedfs-telemetry/bin/ && \
|
||||||
|
chmod +x ~/seaweedfs-telemetry/bin/telemetry-server && \
|
||||||
|
sudo systemctl start telemetry.service && \
|
||||||
|
sudo systemctl status telemetry.service"
|
||||||
|
|
||||||
|
# Verify deployment
|
||||||
|
ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST "
|
||||||
|
echo 'Waiting for service to start...'
|
||||||
|
sleep 5
|
||||||
|
curl -f http://localhost:8353/health || echo 'Health check failed'"
|
||||||
|
|
||||||
|
rm -f ~/.ssh/deploy_key
|
||||||
|
|
||||||
|
- name: Notify Deployment Status
|
||||||
|
if: always()
|
||||||
|
run: |
|
||||||
|
if [ "${{ job.status }}" == "success" ]; then
|
||||||
|
echo "✅ Telemetry server deployment successful"
|
||||||
|
echo "Dashboard: http://${{ secrets.TELEMETRY_HOST }}:8353"
|
||||||
|
echo "Metrics: http://${{ secrets.TELEMETRY_HOST }}:8353/metrics"
|
||||||
|
else
|
||||||
|
echo "❌ Telemetry server deployment failed"
|
||||||
|
fi
|
||||||
271
telemetry/DEPLOYMENT.md
Normal file
271
telemetry/DEPLOYMENT.md
Normal file
@@ -0,0 +1,271 @@
|
|||||||
|
# SeaweedFS Telemetry Server Deployment
|
||||||
|
|
||||||
|
This document describes how to deploy the SeaweedFS telemetry server to a remote server using GitHub Actions.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
1. A remote Linux server with:
|
||||||
|
- SSH access
|
||||||
|
- systemd (for service management)
|
||||||
|
- Optional: Prometheus and Grafana (for monitoring)
|
||||||
|
|
||||||
|
2. GitHub repository secrets configured (see [Setup GitHub Secrets](#setup-github-secrets) below):
|
||||||
|
- `TELEMETRY_SSH_PRIVATE_KEY`: SSH private key for accessing the remote server
|
||||||
|
- `TELEMETRY_HOST`: Remote server hostname or IP address
|
||||||
|
- `TELEMETRY_USER`: Username for SSH access
|
||||||
|
|
||||||
|
## Setup GitHub Secrets
|
||||||
|
|
||||||
|
Before using the deployment workflow, you need to configure the required secrets in your GitHub repository.
|
||||||
|
|
||||||
|
### Step 1: Generate SSH Key Pair
|
||||||
|
|
||||||
|
On your local machine, generate a new SSH key pair specifically for deployment:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Generate a new SSH key pair
|
||||||
|
ssh-keygen -t ed25519 -C "seaweedfs-telemetry-deploy" -f ~/.ssh/seaweedfs_telemetry_deploy
|
||||||
|
|
||||||
|
# This creates two files:
|
||||||
|
# ~/.ssh/seaweedfs_telemetry_deploy (private key)
|
||||||
|
# ~/.ssh/seaweedfs_telemetry_deploy.pub (public key)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 2: Configure Remote Server
|
||||||
|
|
||||||
|
Copy the public key to your remote server:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Copy public key to remote server
|
||||||
|
ssh-copy-id -i ~/.ssh/seaweedfs_telemetry_deploy.pub user@your-server.com
|
||||||
|
|
||||||
|
# Or manually append to authorized_keys
|
||||||
|
cat ~/.ssh/seaweedfs_telemetry_deploy.pub | ssh user@your-server.com "mkdir -p ~/.ssh && cat >> ~/.ssh/authorized_keys"
|
||||||
|
```
|
||||||
|
|
||||||
|
Test the SSH connection:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Test SSH connection with the new key
|
||||||
|
ssh -i ~/.ssh/seaweedfs_telemetry_deploy user@your-server.com "echo 'SSH connection successful'"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 3: Add Secrets to GitHub Repository
|
||||||
|
|
||||||
|
1. Go to your GitHub repository
|
||||||
|
2. Click on **Settings** tab
|
||||||
|
3. In the sidebar, click **Secrets and variables** → **Actions**
|
||||||
|
4. Click **New repository secret** for each of the following:
|
||||||
|
|
||||||
|
#### TELEMETRY_SSH_PRIVATE_KEY
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Display the private key content
|
||||||
|
cat ~/.ssh/seaweedfs_telemetry_deploy
|
||||||
|
```
|
||||||
|
|
||||||
|
- **Name**: `TELEMETRY_SSH_PRIVATE_KEY`
|
||||||
|
- **Value**: Copy the entire private key content, including the `-----BEGIN OPENSSH PRIVATE KEY-----` and `-----END OPENSSH PRIVATE KEY-----` lines
|
||||||
|
|
||||||
|
#### TELEMETRY_HOST
|
||||||
|
|
||||||
|
- **Name**: `TELEMETRY_HOST`
|
||||||
|
- **Value**: Your server's hostname or IP address (e.g., `telemetry.example.com` or `192.168.1.100`)
|
||||||
|
|
||||||
|
#### TELEMETRY_USER
|
||||||
|
|
||||||
|
- **Name**: `TELEMETRY_USER`
|
||||||
|
- **Value**: The username on the remote server (e.g., `ubuntu`, `deploy`, or your username)
|
||||||
|
|
||||||
|
### Step 4: Verify Configuration
|
||||||
|
|
||||||
|
Create a simple test workflow or manually trigger the deployment to verify the secrets are working correctly.
|
||||||
|
|
||||||
|
### Security Best Practices
|
||||||
|
|
||||||
|
1. **Dedicated SSH Key**: Use a separate SSH key only for deployment
|
||||||
|
2. **Limited Permissions**: Create a dedicated user on the remote server with minimal required permissions
|
||||||
|
3. **Key Rotation**: Regularly rotate SSH keys
|
||||||
|
4. **Server Access**: Restrict SSH access to specific IP ranges if possible
|
||||||
|
|
||||||
|
### Example Server Setup
|
||||||
|
|
||||||
|
If you're setting up a new server, here's a basic configuration:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# On the remote server, create a dedicated user for deployment
|
||||||
|
sudo useradd -m -s /bin/bash seaweedfs-deploy
|
||||||
|
sudo usermod -aG sudo seaweedfs-deploy # Only if sudo access is needed
|
||||||
|
|
||||||
|
# Switch to the deployment user
|
||||||
|
sudo su - seaweedfs-deploy
|
||||||
|
|
||||||
|
# Create SSH directory
|
||||||
|
mkdir -p ~/.ssh
|
||||||
|
chmod 700 ~/.ssh
|
||||||
|
|
||||||
|
# Add your public key (paste the content of seaweedfs_telemetry_deploy.pub)
|
||||||
|
nano ~/.ssh/authorized_keys
|
||||||
|
chmod 600 ~/.ssh/authorized_keys
|
||||||
|
```
|
||||||
|
|
||||||
|
### Troubleshooting
|
||||||
|
|
||||||
|
#### SSH Connection Issues
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Test SSH connection manually
|
||||||
|
ssh -i ~/.ssh/seaweedfs_telemetry_deploy -v user@your-server.com
|
||||||
|
|
||||||
|
# Check SSH key permissions
|
||||||
|
ls -la ~/.ssh/seaweedfs_telemetry_deploy*
|
||||||
|
# Should show: -rw------- for private key, -rw-r--r-- for public key
|
||||||
|
```
|
||||||
|
|
||||||
|
#### GitHub Actions Fails
|
||||||
|
|
||||||
|
1. **Check secrets**: Ensure all three secrets are properly set in GitHub
|
||||||
|
2. **Verify SSH key**: Make sure the entire private key (including headers/footers) is copied
|
||||||
|
3. **Test connectivity**: Manually SSH to the server from your local machine
|
||||||
|
4. **Check user permissions**: Ensure the remote user has necessary permissions
|
||||||
|
|
||||||
|
## GitHub Actions Workflow
|
||||||
|
|
||||||
|
The deployment workflow (`.github/workflows/deploy_telemetry.yml`) provides two main operations:
|
||||||
|
|
||||||
|
### 1. First-time Setup
|
||||||
|
|
||||||
|
Run this once to set up the remote server:
|
||||||
|
|
||||||
|
1. Go to GitHub Actions in your repository
|
||||||
|
2. Select "Deploy Telemetry Server" workflow
|
||||||
|
3. Click "Run workflow"
|
||||||
|
4. Check "Run first-time server setup"
|
||||||
|
5. Click "Run workflow"
|
||||||
|
|
||||||
|
This will:
|
||||||
|
- Create necessary directories on the remote server
|
||||||
|
- Set up systemd service configuration
|
||||||
|
- Configure log rotation
|
||||||
|
- Upload Grafana dashboard and Prometheus configuration
|
||||||
|
|
||||||
|
|
||||||
|
### 2. Deploy Updates
|
||||||
|
|
||||||
|
Deployments happen automatically when:
|
||||||
|
- Code is pushed to the `master` branch with changes in the `telemetry/` directory
|
||||||
|
|
||||||
|
Or manually trigger deployment:
|
||||||
|
1. Go to GitHub Actions in your repository
|
||||||
|
2. Select "Deploy Telemetry Server" workflow
|
||||||
|
3. Click "Run workflow"
|
||||||
|
4. Check "Deploy telemetry server to remote server"
|
||||||
|
5. Click "Run workflow"
|
||||||
|
|
||||||
|
## Server Directory Structure
|
||||||
|
|
||||||
|
After setup, the remote server will have:
|
||||||
|
|
||||||
|
```
|
||||||
|
~/seaweedfs-telemetry/
|
||||||
|
├── bin/
|
||||||
|
│ └── telemetry-server # Binary executable
|
||||||
|
├── logs/
|
||||||
|
│ ├── telemetry.log # Application logs
|
||||||
|
│ └── telemetry.error.log # Error logs
|
||||||
|
├── data/ # Data directory (if needed)
|
||||||
|
├── grafana-dashboard.json # Grafana dashboard configuration
|
||||||
|
└── prometheus.yml # Prometheus configuration
|
||||||
|
```
|
||||||
|
|
||||||
|
## Service Management
|
||||||
|
|
||||||
|
The telemetry server runs as a systemd service:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check service status
|
||||||
|
sudo systemctl status telemetry.service
|
||||||
|
|
||||||
|
# View logs
|
||||||
|
sudo journalctl -u telemetry.service -f
|
||||||
|
|
||||||
|
# Restart service
|
||||||
|
sudo systemctl restart telemetry.service
|
||||||
|
|
||||||
|
# Stop/start service
|
||||||
|
sudo systemctl stop telemetry.service
|
||||||
|
sudo systemctl start telemetry.service
|
||||||
|
```
|
||||||
|
|
||||||
|
## Accessing the Service
|
||||||
|
|
||||||
|
After deployment, the telemetry server will be available at:
|
||||||
|
|
||||||
|
- **Dashboard**: `http://your-server:8353`
|
||||||
|
- **API**: `http://your-server:8353/api/*`
|
||||||
|
- **Metrics**: `http://your-server:8353/metrics`
|
||||||
|
- **Health Check**: `http://your-server:8353/health`
|
||||||
|
|
||||||
|
## Optional: Prometheus and Grafana Integration
|
||||||
|
|
||||||
|
### Prometheus Setup
|
||||||
|
|
||||||
|
1. Install Prometheus on your server
|
||||||
|
2. Update `/etc/prometheus/prometheus.yml` to include:
|
||||||
|
```yaml
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: 'seaweedfs-telemetry'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['localhost:8353']
|
||||||
|
metrics_path: '/metrics'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Grafana Setup
|
||||||
|
|
||||||
|
1. Install Grafana on your server
|
||||||
|
2. Import the dashboard from `~/seaweedfs-telemetry/grafana-dashboard.json`
|
||||||
|
3. Configure Prometheus as a data source pointing to your Prometheus instance
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Deployment Fails
|
||||||
|
|
||||||
|
1. Check GitHub Actions logs for detailed error messages
|
||||||
|
2. Verify SSH connectivity: `ssh user@host`
|
||||||
|
3. Ensure all required secrets are configured in GitHub
|
||||||
|
|
||||||
|
### Service Won't Start
|
||||||
|
|
||||||
|
1. Check service logs: `sudo journalctl -u telemetry.service`
|
||||||
|
2. Verify binary permissions: `ls -la ~/seaweedfs-telemetry/bin/`
|
||||||
|
3. Test binary manually: `~/seaweedfs-telemetry/bin/telemetry-server -help`
|
||||||
|
|
||||||
|
### Port Conflicts
|
||||||
|
|
||||||
|
If port 8353 is already in use:
|
||||||
|
|
||||||
|
1. Edit the systemd service: `sudo systemctl edit telemetry.service`
|
||||||
|
2. Add override configuration:
|
||||||
|
```ini
|
||||||
|
[Service]
|
||||||
|
ExecStart=
|
||||||
|
ExecStart=/home/user/seaweedfs-telemetry/bin/telemetry-server -port=8354
|
||||||
|
```
|
||||||
|
3. Reload and restart: `sudo systemctl daemon-reload && sudo systemctl restart telemetry.service`
|
||||||
|
|
||||||
|
## Security Considerations
|
||||||
|
|
||||||
|
1. **Firewall**: Consider restricting access to telemetry ports
|
||||||
|
2. **SSH Keys**: Use dedicated SSH keys with minimal permissions
|
||||||
|
3. **User Permissions**: Run the service as a non-privileged user
|
||||||
|
4. **Network**: Consider running on internal networks only
|
||||||
|
|
||||||
|
## Monitoring
|
||||||
|
|
||||||
|
Monitor the deployment and service health:
|
||||||
|
|
||||||
|
- **GitHub Actions**: Check workflow runs for deployment status
|
||||||
|
- **System Logs**: `sudo journalctl -u telemetry.service`
|
||||||
|
- **Application Logs**: `tail -f ~/seaweedfs-telemetry/logs/telemetry.log`
|
||||||
|
- **Health Endpoint**: `curl http://localhost:8353/health`
|
||||||
|
- **Metrics**: `curl http://localhost:8353/metrics`
|
||||||
351
telemetry/README.md
Normal file
351
telemetry/README.md
Normal file
@@ -0,0 +1,351 @@
|
|||||||
|
# SeaweedFS Telemetry System
|
||||||
|
|
||||||
|
A privacy-respecting telemetry system for SeaweedFS that collects cluster-level usage statistics and provides visualization through Prometheus and Grafana.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- **Privacy-First Design**: Uses in-memory cluster IDs (regenerated on restart), no personal data collection
|
||||||
|
- **Prometheus Integration**: Native Prometheus metrics for monitoring and alerting
|
||||||
|
- **Grafana Dashboards**: Pre-built dashboards for data visualization
|
||||||
|
- **Protocol Buffers**: Efficient binary data transmission for optimal performance
|
||||||
|
- **Opt-in Only**: Disabled by default, requires explicit configuration
|
||||||
|
- **Docker Compose**: Complete monitoring stack deployment
|
||||||
|
- **Automatic Cleanup**: Configurable data retention policies
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
SeaweedFS Cluster → Telemetry Client → Telemetry Server → Prometheus → Grafana
|
||||||
|
(protobuf) (metrics) (queries)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Data Transmission
|
||||||
|
|
||||||
|
The telemetry system uses **Protocol Buffers exclusively** for efficient binary data transmission:
|
||||||
|
|
||||||
|
- **Compact Format**: 30-50% smaller than JSON
|
||||||
|
- **Fast Serialization**: Better performance than text-based formats
|
||||||
|
- **Type Safety**: Strong typing with generated Go structs
|
||||||
|
- **Schema Evolution**: Built-in versioning support
|
||||||
|
|
||||||
|
### Protobuf Schema
|
||||||
|
|
||||||
|
```protobuf
|
||||||
|
message TelemetryData {
|
||||||
|
string cluster_id = 1; // In-memory generated UUID
|
||||||
|
string version = 2; // SeaweedFS version
|
||||||
|
string os = 3; // Operating system
|
||||||
|
repeated string features = 4; // Enabled features
|
||||||
|
string deployment = 5; // Deployment type
|
||||||
|
int32 volume_server_count = 6; // Number of volume servers
|
||||||
|
uint64 total_disk_bytes = 7; // Total disk usage
|
||||||
|
int32 total_volume_count = 8; // Total volume count
|
||||||
|
int64 timestamp = 9; // Collection timestamp
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Privacy Approach
|
||||||
|
|
||||||
|
- **No Personal Data**: No hostnames, IP addresses, or user information
|
||||||
|
- **In-Memory IDs**: Cluster IDs are generated in-memory and change on restart
|
||||||
|
- **Aggregated Data**: Only cluster-level statistics, no individual file/user data
|
||||||
|
- **Opt-in Only**: Telemetry is disabled by default
|
||||||
|
- **Transparent**: Open source implementation, clear data collection policy
|
||||||
|
|
||||||
|
## Collected Data
|
||||||
|
|
||||||
|
| Field | Description | Example |
|
||||||
|
|-------|-------------|---------|
|
||||||
|
| `cluster_id` | In-memory UUID (changes on restart) | `a1b2c3d4-...` |
|
||||||
|
| `version` | SeaweedFS version | `3.45` |
|
||||||
|
| `os` | Operating system and architecture | `linux/amd64` |
|
||||||
|
| `features` | Enabled components | `["filer", "s3api"]` |
|
||||||
|
| `deployment` | Deployment type | `cluster` |
|
||||||
|
| `volume_server_count` | Number of volume servers | `5` |
|
||||||
|
| `total_disk_bytes` | Total disk usage across cluster | `1073741824` |
|
||||||
|
| `total_volume_count` | Total number of volumes | `120` |
|
||||||
|
| `timestamp` | When data was collected | `1640995200` |
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
### 1. Deploy Telemetry Server
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Clone and start the complete monitoring stack
|
||||||
|
git clone https://github.com/seaweedfs/seaweedfs.git
|
||||||
|
cd seaweedfs/telemetry
|
||||||
|
docker-compose up -d
|
||||||
|
|
||||||
|
# Or run the server directly
|
||||||
|
cd server
|
||||||
|
go run . -port=8080 -dashboard=true
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Configure SeaweedFS
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Enable telemetry in SeaweedFS master (uses default telemetry.seaweedfs.com:3091)
|
||||||
|
weed master -telemetry=true
|
||||||
|
|
||||||
|
# Or in server mode
|
||||||
|
weed server -telemetry=true
|
||||||
|
|
||||||
|
# Or specify custom telemetry server
|
||||||
|
weed master -telemetry=true -telemetry.url=http://localhost:8080/api/collect
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Access Dashboards
|
||||||
|
|
||||||
|
- **Telemetry Server**: http://localhost:8080
|
||||||
|
- **Prometheus**: http://localhost:9090
|
||||||
|
- **Grafana**: http://localhost:3000 (admin/admin)
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
### SeaweedFS Master/Server
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Enable telemetry
|
||||||
|
-telemetry=true
|
||||||
|
|
||||||
|
# Set custom telemetry server URL (optional, defaults to telemetry.seaweedfs.com:3091)
|
||||||
|
-telemetry.url=http://your-telemetry-server:8080/api/collect
|
||||||
|
```
|
||||||
|
|
||||||
|
### Telemetry Server
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Server configuration
|
||||||
|
-port=8080 # Server port
|
||||||
|
-dashboard=true # Enable built-in dashboard
|
||||||
|
-cleanup=24h # Cleanup interval
|
||||||
|
-max-age=720h # Maximum data retention (30 days)
|
||||||
|
|
||||||
|
# Example
|
||||||
|
./telemetry-server -port=8080 -dashboard=true -cleanup=24h -max-age=720h
|
||||||
|
```
|
||||||
|
|
||||||
|
## Prometheus Metrics
|
||||||
|
|
||||||
|
The telemetry server exposes these Prometheus metrics:
|
||||||
|
|
||||||
|
### Cluster Metrics
|
||||||
|
- `seaweedfs_telemetry_total_clusters`: Total unique clusters (30 days)
|
||||||
|
- `seaweedfs_telemetry_active_clusters`: Active clusters (7 days)
|
||||||
|
|
||||||
|
### Per-Cluster Metrics
|
||||||
|
- `seaweedfs_telemetry_volume_servers{cluster_id, version, os, deployment}`: Volume servers per cluster
|
||||||
|
- `seaweedfs_telemetry_disk_bytes{cluster_id, version, os, deployment}`: Disk usage per cluster
|
||||||
|
- `seaweedfs_telemetry_volume_count{cluster_id, version, os, deployment}`: Volume count per cluster
|
||||||
|
- `seaweedfs_telemetry_filer_count{cluster_id, version, os, deployment}`: Filer servers per cluster
|
||||||
|
- `seaweedfs_telemetry_broker_count{cluster_id, version, os, deployment}`: Broker servers per cluster
|
||||||
|
- `seaweedfs_telemetry_cluster_info{cluster_id, version, os, deployment, features}`: Cluster metadata
|
||||||
|
|
||||||
|
### Server Metrics
|
||||||
|
- `seaweedfs_telemetry_reports_received_total`: Total telemetry reports received
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### Data Collection
|
||||||
|
```bash
|
||||||
|
# Submit telemetry data (protobuf only)
|
||||||
|
POST /api/collect
|
||||||
|
Content-Type: application/x-protobuf
|
||||||
|
[TelemetryRequest protobuf data]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Statistics (JSON for dashboard/debugging)
|
||||||
|
```bash
|
||||||
|
# Get aggregated statistics
|
||||||
|
GET /api/stats
|
||||||
|
|
||||||
|
# Get recent cluster instances
|
||||||
|
GET /api/instances?limit=100
|
||||||
|
|
||||||
|
# Get metrics over time
|
||||||
|
GET /api/metrics?days=30
|
||||||
|
```
|
||||||
|
|
||||||
|
### Monitoring
|
||||||
|
```bash
|
||||||
|
# Prometheus metrics
|
||||||
|
GET /metrics
|
||||||
|
```
|
||||||
|
|
||||||
|
## Docker Deployment
|
||||||
|
|
||||||
|
### Complete Stack (Recommended)
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# docker-compose.yml
|
||||||
|
version: '3.8'
|
||||||
|
services:
|
||||||
|
telemetry-server:
|
||||||
|
build: ./server
|
||||||
|
ports:
|
||||||
|
- "8080:8080"
|
||||||
|
command: ["-port=8080", "-dashboard=true", "-cleanup=24h"]
|
||||||
|
|
||||||
|
prometheus:
|
||||||
|
image: prom/prometheus:latest
|
||||||
|
ports:
|
||||||
|
- "9090:9090"
|
||||||
|
volumes:
|
||||||
|
- ./prometheus.yml:/etc/prometheus/prometheus.yml
|
||||||
|
|
||||||
|
grafana:
|
||||||
|
image: grafana/grafana:latest
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
environment:
|
||||||
|
- GF_SECURITY_ADMIN_PASSWORD=admin
|
||||||
|
volumes:
|
||||||
|
- ./grafana-provisioning:/etc/grafana/provisioning
|
||||||
|
- ./grafana-dashboard.json:/var/lib/grafana/dashboards/seaweedfs.json
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Deploy the stack
|
||||||
|
docker-compose up -d
|
||||||
|
|
||||||
|
# Scale telemetry server if needed
|
||||||
|
docker-compose up -d --scale telemetry-server=3
|
||||||
|
```
|
||||||
|
|
||||||
|
### Server Only
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build and run telemetry server
|
||||||
|
cd server
|
||||||
|
docker build -t seaweedfs-telemetry .
|
||||||
|
docker run -p 8080:8080 seaweedfs-telemetry -port=8080 -dashboard=true
|
||||||
|
```
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
### Protocol Buffer Development
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Generate protobuf code
|
||||||
|
cd telemetry
|
||||||
|
protoc --go_out=. --go_opt=paths=source_relative proto/telemetry.proto
|
||||||
|
|
||||||
|
# The generated code is already included in the repository
|
||||||
|
```
|
||||||
|
|
||||||
|
### Build from Source
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build telemetry server
|
||||||
|
cd telemetry/server
|
||||||
|
go build -o telemetry-server .
|
||||||
|
|
||||||
|
# Build SeaweedFS with telemetry support
|
||||||
|
cd ../..
|
||||||
|
go build -o weed ./weed
|
||||||
|
```
|
||||||
|
|
||||||
|
### Testing
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Test telemetry server
|
||||||
|
cd telemetry/server
|
||||||
|
go test ./...
|
||||||
|
|
||||||
|
# Test protobuf communication (requires protobuf tools)
|
||||||
|
# See telemetry client code for examples
|
||||||
|
```
|
||||||
|
|
||||||
|
## Grafana Dashboard
|
||||||
|
|
||||||
|
The included Grafana dashboard provides:
|
||||||
|
|
||||||
|
- **Overview**: Total and active clusters, version distribution
|
||||||
|
- **Resource Usage**: Volume servers and disk usage over time
|
||||||
|
- **Deployments**: Deployment type and OS distribution
|
||||||
|
- **Growth Trends**: Historical growth patterns
|
||||||
|
|
||||||
|
### Custom Queries
|
||||||
|
|
||||||
|
```promql
|
||||||
|
# Total active clusters
|
||||||
|
seaweedfs_telemetry_active_clusters
|
||||||
|
|
||||||
|
# Disk usage by version
|
||||||
|
sum by (version) (seaweedfs_telemetry_disk_bytes)
|
||||||
|
|
||||||
|
# Volume servers by deployment type
|
||||||
|
sum by (deployment) (seaweedfs_telemetry_volume_servers)
|
||||||
|
|
||||||
|
# Filer servers by version
|
||||||
|
sum by (version) (seaweedfs_telemetry_filer_count)
|
||||||
|
|
||||||
|
# Broker servers across all clusters
|
||||||
|
sum(seaweedfs_telemetry_broker_count)
|
||||||
|
|
||||||
|
# Growth rate (weekly)
|
||||||
|
increase(seaweedfs_telemetry_total_clusters[7d])
|
||||||
|
```
|
||||||
|
|
||||||
|
## Security Considerations
|
||||||
|
|
||||||
|
- **Network Security**: Use HTTPS in production environments
|
||||||
|
- **Access Control**: Implement authentication for Grafana and Prometheus
|
||||||
|
- **Data Retention**: Configure appropriate retention policies
|
||||||
|
- **Monitoring**: Monitor the telemetry infrastructure itself
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Common Issues
|
||||||
|
|
||||||
|
**SeaweedFS not sending data:**
|
||||||
|
```bash
|
||||||
|
# Check telemetry configuration
|
||||||
|
weed master -h | grep telemetry
|
||||||
|
|
||||||
|
# Verify connectivity
|
||||||
|
curl -v http://your-telemetry-server:8080/api/collect
|
||||||
|
```
|
||||||
|
|
||||||
|
**Server not receiving data:**
|
||||||
|
```bash
|
||||||
|
# Check server logs
|
||||||
|
docker-compose logs telemetry-server
|
||||||
|
|
||||||
|
# Verify metrics endpoint
|
||||||
|
curl http://localhost:8080/metrics
|
||||||
|
```
|
||||||
|
|
||||||
|
**Prometheus not scraping:**
|
||||||
|
```bash
|
||||||
|
# Check Prometheus targets
|
||||||
|
curl http://localhost:9090/api/v1/targets
|
||||||
|
|
||||||
|
# Verify configuration
|
||||||
|
docker-compose logs prometheus
|
||||||
|
```
|
||||||
|
|
||||||
|
### Debugging
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Enable verbose logging in SeaweedFS
|
||||||
|
weed master -v=2 -telemetry=true
|
||||||
|
|
||||||
|
# Check telemetry server metrics
|
||||||
|
curl http://localhost:8080/metrics | grep seaweedfs_telemetry
|
||||||
|
|
||||||
|
# Test data flow
|
||||||
|
curl http://localhost:8080/api/stats
|
||||||
|
```
|
||||||
|
|
||||||
|
## Contributing
|
||||||
|
|
||||||
|
1. Fork the repository
|
||||||
|
2. Create a feature branch
|
||||||
|
3. Make your changes
|
||||||
|
4. Add tests if applicable
|
||||||
|
5. Submit a pull request
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
This telemetry system is part of SeaweedFS and follows the same Apache 2.0 license.
|
||||||
55
telemetry/docker-compose.yml
Normal file
55
telemetry/docker-compose.yml
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
telemetry-server:
|
||||||
|
build: ./server
|
||||||
|
ports:
|
||||||
|
- "8080:8080"
|
||||||
|
command: [
|
||||||
|
"./telemetry-server",
|
||||||
|
"-port=8080",
|
||||||
|
"-dashboard=false", # Disable built-in dashboard, use Grafana
|
||||||
|
"-log=true",
|
||||||
|
"-cors=true"
|
||||||
|
]
|
||||||
|
networks:
|
||||||
|
- telemetry
|
||||||
|
|
||||||
|
prometheus:
|
||||||
|
image: prom/prometheus:latest
|
||||||
|
ports:
|
||||||
|
- "9090:9090"
|
||||||
|
volumes:
|
||||||
|
- ./prometheus.yml:/etc/prometheus/prometheus.yml
|
||||||
|
- prometheus_data:/prometheus
|
||||||
|
command:
|
||||||
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||||
|
- '--storage.tsdb.path=/prometheus'
|
||||||
|
- '--web.console.libraries=/etc/prometheus/console_libraries'
|
||||||
|
- '--web.console.templates=/etc/prometheus/consoles'
|
||||||
|
- '--storage.tsdb.retention.time=200h'
|
||||||
|
- '--web.enable-lifecycle'
|
||||||
|
networks:
|
||||||
|
- telemetry
|
||||||
|
|
||||||
|
grafana:
|
||||||
|
image: grafana/grafana:latest
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
environment:
|
||||||
|
- GF_SECURITY_ADMIN_PASSWORD=admin
|
||||||
|
- GF_USERS_ALLOW_SIGN_UP=false
|
||||||
|
volumes:
|
||||||
|
- grafana_data:/var/lib/grafana
|
||||||
|
- ./grafana-dashboard.json:/var/lib/grafana/dashboards/seaweedfs-telemetry.json
|
||||||
|
- ./grafana-provisioning:/etc/grafana/provisioning
|
||||||
|
networks:
|
||||||
|
- telemetry
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
prometheus_data:
|
||||||
|
grafana_data:
|
||||||
|
|
||||||
|
networks:
|
||||||
|
telemetry:
|
||||||
|
driver: bridge
|
||||||
734
telemetry/grafana-dashboard.json
Normal file
734
telemetry/grafana-dashboard.json
Normal file
@@ -0,0 +1,734 @@
|
|||||||
|
{
|
||||||
|
"annotations": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"builtIn": 1,
|
||||||
|
"datasource": {
|
||||||
|
"type": "grafana",
|
||||||
|
"uid": "-- Grafana --"
|
||||||
|
},
|
||||||
|
"enable": true,
|
||||||
|
"hide": true,
|
||||||
|
"iconColor": "rgba(0, 211, 255, 1)",
|
||||||
|
"name": "Annotations & Alerts",
|
||||||
|
"type": "dashboard"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"editable": true,
|
||||||
|
"fiscalYearStartMonth": 0,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"id": null,
|
||||||
|
"links": [],
|
||||||
|
"liveNow": false,
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "thresholds"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"align": "auto",
|
||||||
|
"cellOptions": {
|
||||||
|
"type": "auto"
|
||||||
|
},
|
||||||
|
"inspect": false
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "red",
|
||||||
|
"value": 80
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 1,
|
||||||
|
"options": {
|
||||||
|
"showHeader": true
|
||||||
|
},
|
||||||
|
"pluginVersion": "10.0.0",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"expr": "seaweedfs_telemetry_total_clusters",
|
||||||
|
"format": "time_series",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Total SeaweedFS Clusters",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "thresholds"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"align": "auto",
|
||||||
|
"cellOptions": {
|
||||||
|
"type": "auto"
|
||||||
|
},
|
||||||
|
"inspect": false
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "red",
|
||||||
|
"value": 80
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 2,
|
||||||
|
"options": {
|
||||||
|
"showHeader": true
|
||||||
|
},
|
||||||
|
"pluginVersion": "10.0.0",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"expr": "seaweedfs_telemetry_active_clusters",
|
||||||
|
"format": "time_series",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Active Clusters (7 days)",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"hideFrom": {
|
||||||
|
"legend": false,
|
||||||
|
"tooltip": false,
|
||||||
|
"vis": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"mappings": []
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 8
|
||||||
|
},
|
||||||
|
"id": 3,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "visible",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"pieType": "pie",
|
||||||
|
"reduceOptions": {
|
||||||
|
"values": false,
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": ""
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "single",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"expr": "count by (version) (seaweedfs_telemetry_cluster_info)",
|
||||||
|
"format": "time_series",
|
||||||
|
"legendFormat": "{{version}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "SeaweedFS Version Distribution",
|
||||||
|
"type": "piechart"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"hideFrom": {
|
||||||
|
"legend": false,
|
||||||
|
"tooltip": false,
|
||||||
|
"vis": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"mappings": []
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 8
|
||||||
|
},
|
||||||
|
"id": 4,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "visible",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"pieType": "pie",
|
||||||
|
"reduceOptions": {
|
||||||
|
"values": false,
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": ""
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "single",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"expr": "count by (os) (seaweedfs_telemetry_cluster_info)",
|
||||||
|
"format": "time_series",
|
||||||
|
"legendFormat": "{{os}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Operating System Distribution",
|
||||||
|
"type": "piechart"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"axisLabel": "",
|
||||||
|
"axisPlacement": "auto",
|
||||||
|
"barAlignment": 0,
|
||||||
|
"drawStyle": "line",
|
||||||
|
"fillOpacity": 0,
|
||||||
|
"gradientMode": "none",
|
||||||
|
"hideFrom": {
|
||||||
|
"legend": false,
|
||||||
|
"tooltip": false,
|
||||||
|
"vis": false
|
||||||
|
},
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"lineWidth": 1,
|
||||||
|
"pointSize": 5,
|
||||||
|
"scaleDistribution": {
|
||||||
|
"type": "linear"
|
||||||
|
},
|
||||||
|
"showPoints": "auto",
|
||||||
|
"spanNulls": false,
|
||||||
|
"stacking": {
|
||||||
|
"group": "A",
|
||||||
|
"mode": "none"
|
||||||
|
},
|
||||||
|
"thresholdsStyle": {
|
||||||
|
"mode": "off"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "red",
|
||||||
|
"value": 80
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 24,
|
||||||
|
"x": 0,
|
||||||
|
"y": 16
|
||||||
|
},
|
||||||
|
"id": 5,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [],
|
||||||
|
"displayMode": "list",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "single",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"expr": "sum(seaweedfs_telemetry_volume_servers)",
|
||||||
|
"format": "time_series",
|
||||||
|
"legendFormat": "Total Volume Servers",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Total Volume Servers Over Time",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"axisLabel": "",
|
||||||
|
"axisPlacement": "auto",
|
||||||
|
"barAlignment": 0,
|
||||||
|
"drawStyle": "line",
|
||||||
|
"fillOpacity": 0,
|
||||||
|
"gradientMode": "none",
|
||||||
|
"hideFrom": {
|
||||||
|
"legend": false,
|
||||||
|
"tooltip": false,
|
||||||
|
"vis": false
|
||||||
|
},
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"lineWidth": 1,
|
||||||
|
"pointSize": 5,
|
||||||
|
"scaleDistribution": {
|
||||||
|
"type": "linear"
|
||||||
|
},
|
||||||
|
"showPoints": "auto",
|
||||||
|
"spanNulls": false,
|
||||||
|
"stacking": {
|
||||||
|
"group": "A",
|
||||||
|
"mode": "none"
|
||||||
|
},
|
||||||
|
"thresholdsStyle": {
|
||||||
|
"mode": "off"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "red",
|
||||||
|
"value": 80
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "bytes"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 24
|
||||||
|
},
|
||||||
|
"id": 6,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [],
|
||||||
|
"displayMode": "list",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "single",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"expr": "sum(seaweedfs_telemetry_disk_bytes)",
|
||||||
|
"format": "time_series",
|
||||||
|
"legendFormat": "Total Disk Usage",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Total Disk Usage Over Time",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"axisLabel": "",
|
||||||
|
"axisPlacement": "auto",
|
||||||
|
"barAlignment": 0,
|
||||||
|
"drawStyle": "line",
|
||||||
|
"fillOpacity": 0,
|
||||||
|
"gradientMode": "none",
|
||||||
|
"hideFrom": {
|
||||||
|
"legend": false,
|
||||||
|
"tooltip": false,
|
||||||
|
"vis": false
|
||||||
|
},
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"lineWidth": 1,
|
||||||
|
"pointSize": 5,
|
||||||
|
"scaleDistribution": {
|
||||||
|
"type": "linear"
|
||||||
|
},
|
||||||
|
"showPoints": "auto",
|
||||||
|
"spanNulls": false,
|
||||||
|
"stacking": {
|
||||||
|
"group": "A",
|
||||||
|
"mode": "none"
|
||||||
|
},
|
||||||
|
"thresholdsStyle": {
|
||||||
|
"mode": "off"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "red",
|
||||||
|
"value": 80
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 24
|
||||||
|
},
|
||||||
|
"id": 7,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [],
|
||||||
|
"displayMode": "list",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "single",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"expr": "sum(seaweedfs_telemetry_volume_count)",
|
||||||
|
"format": "time_series",
|
||||||
|
"legendFormat": "Total Volume Count",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Total Volume Count Over Time",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"axisLabel": "",
|
||||||
|
"axisPlacement": "auto",
|
||||||
|
"barAlignment": 0,
|
||||||
|
"drawStyle": "line",
|
||||||
|
"fillOpacity": 0,
|
||||||
|
"gradientMode": "none",
|
||||||
|
"hideFrom": {
|
||||||
|
"legend": false,
|
||||||
|
"tooltip": false,
|
||||||
|
"vis": false
|
||||||
|
},
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"lineWidth": 1,
|
||||||
|
"pointSize": 5,
|
||||||
|
"scaleDistribution": {
|
||||||
|
"type": "linear"
|
||||||
|
},
|
||||||
|
"showPoints": "auto",
|
||||||
|
"spanNulls": false,
|
||||||
|
"stacking": {
|
||||||
|
"group": "A",
|
||||||
|
"mode": "none"
|
||||||
|
},
|
||||||
|
"thresholdsStyle": {
|
||||||
|
"mode": "off"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "red",
|
||||||
|
"value": 80
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 32
|
||||||
|
},
|
||||||
|
"id": 8,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [],
|
||||||
|
"displayMode": "list",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "single",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"expr": "sum(seaweedfs_telemetry_filer_count)",
|
||||||
|
"format": "time_series",
|
||||||
|
"legendFormat": "Total Filer Count",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Total Filer Servers Over Time",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"axisLabel": "",
|
||||||
|
"axisPlacement": "auto",
|
||||||
|
"barAlignment": 0,
|
||||||
|
"drawStyle": "line",
|
||||||
|
"fillOpacity": 0,
|
||||||
|
"gradientMode": "none",
|
||||||
|
"hideFrom": {
|
||||||
|
"legend": false,
|
||||||
|
"tooltip": false,
|
||||||
|
"vis": false
|
||||||
|
},
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"lineWidth": 1,
|
||||||
|
"pointSize": 5,
|
||||||
|
"scaleDistribution": {
|
||||||
|
"type": "linear"
|
||||||
|
},
|
||||||
|
"showPoints": "auto",
|
||||||
|
"spanNulls": false,
|
||||||
|
"stacking": {
|
||||||
|
"group": "A",
|
||||||
|
"mode": "none"
|
||||||
|
},
|
||||||
|
"thresholdsStyle": {
|
||||||
|
"mode": "off"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "red",
|
||||||
|
"value": 80
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 32
|
||||||
|
},
|
||||||
|
"id": 9,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [],
|
||||||
|
"displayMode": "list",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "single",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"expr": "sum(seaweedfs_telemetry_broker_count)",
|
||||||
|
"format": "time_series",
|
||||||
|
"legendFormat": "Total Broker Count",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Total Broker Servers Over Time",
|
||||||
|
"type": "timeseries"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"refresh": "5m",
|
||||||
|
"schemaVersion": 38,
|
||||||
|
"style": "dark",
|
||||||
|
"tags": [
|
||||||
|
"seaweedfs",
|
||||||
|
"telemetry"
|
||||||
|
],
|
||||||
|
"templating": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-24h",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timepicker": {},
|
||||||
|
"timezone": "",
|
||||||
|
"title": "SeaweedFS Telemetry Dashboard",
|
||||||
|
"uid": "seaweedfs-telemetry",
|
||||||
|
"version": 1,
|
||||||
|
"weekStart": ""
|
||||||
|
}
|
||||||
12
telemetry/grafana-provisioning/dashboards/dashboards.yml
Normal file
12
telemetry/grafana-provisioning/dashboards/dashboards.yml
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
providers:
|
||||||
|
- name: 'seaweedfs'
|
||||||
|
orgId: 1
|
||||||
|
folder: ''
|
||||||
|
type: file
|
||||||
|
disableDeletion: false
|
||||||
|
updateIntervalSeconds: 10
|
||||||
|
allowUiUpdates: true
|
||||||
|
options:
|
||||||
|
path: /var/lib/grafana/dashboards
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
datasources:
|
||||||
|
- name: Prometheus
|
||||||
|
type: prometheus
|
||||||
|
access: proxy
|
||||||
|
url: http://prometheus:9090
|
||||||
|
isDefault: true
|
||||||
|
editable: true
|
||||||
15
telemetry/prometheus.yml
Normal file
15
telemetry/prometheus.yml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
global:
|
||||||
|
scrape_interval: 15s
|
||||||
|
evaluation_interval: 15s
|
||||||
|
|
||||||
|
rule_files:
|
||||||
|
# - "first_rules.yml"
|
||||||
|
# - "second_rules.yml"
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: 'seaweedfs-telemetry'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['telemetry-server:8080']
|
||||||
|
scrape_interval: 30s
|
||||||
|
metrics_path: '/metrics'
|
||||||
|
scrape_timeout: 10s
|
||||||
398
telemetry/proto/telemetry.pb.go
Normal file
398
telemetry/proto/telemetry.pb.go
Normal file
@@ -0,0 +1,398 @@
|
|||||||
|
// Code generated by protoc-gen-go. DO NOT EDIT.
|
||||||
|
// versions:
|
||||||
|
// protoc-gen-go v1.34.2
|
||||||
|
// protoc v5.29.3
|
||||||
|
// source: proto/telemetry.proto
|
||||||
|
|
||||||
|
package proto
|
||||||
|
|
||||||
|
import (
|
||||||
|
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
|
||||||
|
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
|
||||||
|
reflect "reflect"
|
||||||
|
sync "sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Verify that this generated code is sufficiently up-to-date.
|
||||||
|
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
|
||||||
|
// Verify that runtime/protoimpl is sufficiently up-to-date.
|
||||||
|
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
|
||||||
|
)
|
||||||
|
|
||||||
|
// TelemetryData represents cluster-level telemetry information
|
||||||
|
type TelemetryData struct {
|
||||||
|
state protoimpl.MessageState
|
||||||
|
sizeCache protoimpl.SizeCache
|
||||||
|
unknownFields protoimpl.UnknownFields
|
||||||
|
|
||||||
|
// Unique cluster identifier (generated in-memory)
|
||||||
|
ClusterId string `protobuf:"bytes,1,opt,name=cluster_id,json=clusterId,proto3" json:"cluster_id,omitempty"`
|
||||||
|
// SeaweedFS version
|
||||||
|
Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"`
|
||||||
|
// Operating system (e.g., "linux/amd64")
|
||||||
|
Os string `protobuf:"bytes,3,opt,name=os,proto3" json:"os,omitempty"`
|
||||||
|
// Enabled features (e.g., ["filer", "s3api", "mq"])
|
||||||
|
Features []string `protobuf:"bytes,4,rep,name=features,proto3" json:"features,omitempty"`
|
||||||
|
// Deployment type ("standalone", "cluster", "master-only", "volume-only")
|
||||||
|
Deployment string `protobuf:"bytes,5,opt,name=deployment,proto3" json:"deployment,omitempty"`
|
||||||
|
// Number of volume servers in the cluster
|
||||||
|
VolumeServerCount int32 `protobuf:"varint,6,opt,name=volume_server_count,json=volumeServerCount,proto3" json:"volume_server_count,omitempty"`
|
||||||
|
// Total disk usage across all volume servers (in bytes)
|
||||||
|
TotalDiskBytes uint64 `protobuf:"varint,7,opt,name=total_disk_bytes,json=totalDiskBytes,proto3" json:"total_disk_bytes,omitempty"`
|
||||||
|
// Total number of volumes in the cluster
|
||||||
|
TotalVolumeCount int32 `protobuf:"varint,8,opt,name=total_volume_count,json=totalVolumeCount,proto3" json:"total_volume_count,omitempty"`
|
||||||
|
// Number of filer servers in the cluster
|
||||||
|
FilerCount int32 `protobuf:"varint,9,opt,name=filer_count,json=filerCount,proto3" json:"filer_count,omitempty"`
|
||||||
|
// Number of broker servers in the cluster
|
||||||
|
BrokerCount int32 `protobuf:"varint,10,opt,name=broker_count,json=brokerCount,proto3" json:"broker_count,omitempty"`
|
||||||
|
// Unix timestamp when the data was collected
|
||||||
|
Timestamp int64 `protobuf:"varint,11,opt,name=timestamp,proto3" json:"timestamp,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *TelemetryData) Reset() {
|
||||||
|
*x = TelemetryData{}
|
||||||
|
if protoimpl.UnsafeEnabled {
|
||||||
|
mi := &file_proto_telemetry_proto_msgTypes[0]
|
||||||
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||||
|
ms.StoreMessageInfo(mi)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *TelemetryData) String() string {
|
||||||
|
return protoimpl.X.MessageStringOf(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (*TelemetryData) ProtoMessage() {}
|
||||||
|
|
||||||
|
func (x *TelemetryData) ProtoReflect() protoreflect.Message {
|
||||||
|
mi := &file_proto_telemetry_proto_msgTypes[0]
|
||||||
|
if protoimpl.UnsafeEnabled && x != nil {
|
||||||
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||||
|
if ms.LoadMessageInfo() == nil {
|
||||||
|
ms.StoreMessageInfo(mi)
|
||||||
|
}
|
||||||
|
return ms
|
||||||
|
}
|
||||||
|
return mi.MessageOf(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deprecated: Use TelemetryData.ProtoReflect.Descriptor instead.
|
||||||
|
func (*TelemetryData) Descriptor() ([]byte, []int) {
|
||||||
|
return file_proto_telemetry_proto_rawDescGZIP(), []int{0}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *TelemetryData) GetClusterId() string {
|
||||||
|
if x != nil {
|
||||||
|
return x.ClusterId
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *TelemetryData) GetVersion() string {
|
||||||
|
if x != nil {
|
||||||
|
return x.Version
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *TelemetryData) GetOs() string {
|
||||||
|
if x != nil {
|
||||||
|
return x.Os
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *TelemetryData) GetFeatures() []string {
|
||||||
|
if x != nil {
|
||||||
|
return x.Features
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *TelemetryData) GetDeployment() string {
|
||||||
|
if x != nil {
|
||||||
|
return x.Deployment
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *TelemetryData) GetVolumeServerCount() int32 {
|
||||||
|
if x != nil {
|
||||||
|
return x.VolumeServerCount
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *TelemetryData) GetTotalDiskBytes() uint64 {
|
||||||
|
if x != nil {
|
||||||
|
return x.TotalDiskBytes
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *TelemetryData) GetTotalVolumeCount() int32 {
|
||||||
|
if x != nil {
|
||||||
|
return x.TotalVolumeCount
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *TelemetryData) GetFilerCount() int32 {
|
||||||
|
if x != nil {
|
||||||
|
return x.FilerCount
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *TelemetryData) GetBrokerCount() int32 {
|
||||||
|
if x != nil {
|
||||||
|
return x.BrokerCount
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *TelemetryData) GetTimestamp() int64 {
|
||||||
|
if x != nil {
|
||||||
|
return x.Timestamp
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// TelemetryRequest is sent from SeaweedFS clusters to the telemetry server
|
||||||
|
type TelemetryRequest struct {
|
||||||
|
state protoimpl.MessageState
|
||||||
|
sizeCache protoimpl.SizeCache
|
||||||
|
unknownFields protoimpl.UnknownFields
|
||||||
|
|
||||||
|
Data *TelemetryData `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *TelemetryRequest) Reset() {
|
||||||
|
*x = TelemetryRequest{}
|
||||||
|
if protoimpl.UnsafeEnabled {
|
||||||
|
mi := &file_proto_telemetry_proto_msgTypes[1]
|
||||||
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||||
|
ms.StoreMessageInfo(mi)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *TelemetryRequest) String() string {
|
||||||
|
return protoimpl.X.MessageStringOf(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (*TelemetryRequest) ProtoMessage() {}
|
||||||
|
|
||||||
|
func (x *TelemetryRequest) ProtoReflect() protoreflect.Message {
|
||||||
|
mi := &file_proto_telemetry_proto_msgTypes[1]
|
||||||
|
if protoimpl.UnsafeEnabled && x != nil {
|
||||||
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||||
|
if ms.LoadMessageInfo() == nil {
|
||||||
|
ms.StoreMessageInfo(mi)
|
||||||
|
}
|
||||||
|
return ms
|
||||||
|
}
|
||||||
|
return mi.MessageOf(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deprecated: Use TelemetryRequest.ProtoReflect.Descriptor instead.
|
||||||
|
func (*TelemetryRequest) Descriptor() ([]byte, []int) {
|
||||||
|
return file_proto_telemetry_proto_rawDescGZIP(), []int{1}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *TelemetryRequest) GetData() *TelemetryData {
|
||||||
|
if x != nil {
|
||||||
|
return x.Data
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// TelemetryResponse is returned by the telemetry server
|
||||||
|
type TelemetryResponse struct {
|
||||||
|
state protoimpl.MessageState
|
||||||
|
sizeCache protoimpl.SizeCache
|
||||||
|
unknownFields protoimpl.UnknownFields
|
||||||
|
|
||||||
|
Success bool `protobuf:"varint,1,opt,name=success,proto3" json:"success,omitempty"`
|
||||||
|
Message string `protobuf:"bytes,2,opt,name=message,proto3" json:"message,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *TelemetryResponse) Reset() {
|
||||||
|
*x = TelemetryResponse{}
|
||||||
|
if protoimpl.UnsafeEnabled {
|
||||||
|
mi := &file_proto_telemetry_proto_msgTypes[2]
|
||||||
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||||
|
ms.StoreMessageInfo(mi)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *TelemetryResponse) String() string {
|
||||||
|
return protoimpl.X.MessageStringOf(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (*TelemetryResponse) ProtoMessage() {}
|
||||||
|
|
||||||
|
func (x *TelemetryResponse) ProtoReflect() protoreflect.Message {
|
||||||
|
mi := &file_proto_telemetry_proto_msgTypes[2]
|
||||||
|
if protoimpl.UnsafeEnabled && x != nil {
|
||||||
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||||
|
if ms.LoadMessageInfo() == nil {
|
||||||
|
ms.StoreMessageInfo(mi)
|
||||||
|
}
|
||||||
|
return ms
|
||||||
|
}
|
||||||
|
return mi.MessageOf(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deprecated: Use TelemetryResponse.ProtoReflect.Descriptor instead.
|
||||||
|
func (*TelemetryResponse) Descriptor() ([]byte, []int) {
|
||||||
|
return file_proto_telemetry_proto_rawDescGZIP(), []int{2}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *TelemetryResponse) GetSuccess() bool {
|
||||||
|
if x != nil {
|
||||||
|
return x.Success
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *TelemetryResponse) GetMessage() string {
|
||||||
|
if x != nil {
|
||||||
|
return x.Message
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
var File_proto_telemetry_proto protoreflect.FileDescriptor
|
||||||
|
|
||||||
|
var file_proto_telemetry_proto_rawDesc = []byte{
|
||||||
|
0x0a, 0x15, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72,
|
||||||
|
0x79, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x09, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74,
|
||||||
|
0x72, 0x79, 0x22, 0xfe, 0x02, 0x0a, 0x0d, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79,
|
||||||
|
0x44, 0x61, 0x74, 0x61, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f,
|
||||||
|
0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65,
|
||||||
|
0x72, 0x49, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02,
|
||||||
|
0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x0e, 0x0a,
|
||||||
|
0x02, 0x6f, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x6f, 0x73, 0x12, 0x1a, 0x0a,
|
||||||
|
0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x09, 0x52,
|
||||||
|
0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x12, 0x1e, 0x0a, 0x0a, 0x64, 0x65, 0x70,
|
||||||
|
0x6c, 0x6f, 0x79, 0x6d, 0x65, 0x6e, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x64,
|
||||||
|
0x65, 0x70, 0x6c, 0x6f, 0x79, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x2e, 0x0a, 0x13, 0x76, 0x6f, 0x6c,
|
||||||
|
0x75, 0x6d, 0x65, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74,
|
||||||
|
0x18, 0x06, 0x20, 0x01, 0x28, 0x05, 0x52, 0x11, 0x76, 0x6f, 0x6c, 0x75, 0x6d, 0x65, 0x53, 0x65,
|
||||||
|
0x72, 0x76, 0x65, 0x72, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x28, 0x0a, 0x10, 0x74, 0x6f, 0x74,
|
||||||
|
0x61, 0x6c, 0x5f, 0x64, 0x69, 0x73, 0x6b, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x07, 0x20,
|
||||||
|
0x01, 0x28, 0x04, 0x52, 0x0e, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x44, 0x69, 0x73, 0x6b, 0x42, 0x79,
|
||||||
|
0x74, 0x65, 0x73, 0x12, 0x2c, 0x0a, 0x12, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x76, 0x6f, 0x6c,
|
||||||
|
0x75, 0x6d, 0x65, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x05, 0x52,
|
||||||
|
0x10, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x56, 0x6f, 0x6c, 0x75, 0x6d, 0x65, 0x43, 0x6f, 0x75, 0x6e,
|
||||||
|
0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x66, 0x69, 0x6c, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74,
|
||||||
|
0x18, 0x09, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x66, 0x69, 0x6c, 0x65, 0x72, 0x43, 0x6f, 0x75,
|
||||||
|
0x6e, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x62, 0x72, 0x6f, 0x6b, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75,
|
||||||
|
0x6e, 0x74, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x62, 0x72, 0x6f, 0x6b, 0x65, 0x72,
|
||||||
|
0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61,
|
||||||
|
0x6d, 0x70, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74,
|
||||||
|
0x61, 0x6d, 0x70, 0x22, 0x40, 0x0a, 0x10, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79,
|
||||||
|
0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x2c, 0x0a, 0x04, 0x64, 0x61, 0x74, 0x61, 0x18,
|
||||||
|
0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72,
|
||||||
|
0x79, 0x2e, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, 0x44, 0x61, 0x74, 0x61, 0x52,
|
||||||
|
0x04, 0x64, 0x61, 0x74, 0x61, 0x22, 0x47, 0x0a, 0x11, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74,
|
||||||
|
0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75,
|
||||||
|
0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63,
|
||||||
|
0x63, 0x65, 0x73, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18,
|
||||||
|
0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x42, 0x30,
|
||||||
|
0x5a, 0x2e, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x73, 0x65, 0x61,
|
||||||
|
0x77, 0x65, 0x65, 0x64, 0x66, 0x73, 0x2f, 0x73, 0x65, 0x61, 0x77, 0x65, 0x65, 0x64, 0x66, 0x73,
|
||||||
|
0x2f, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f,
|
||||||
|
0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
file_proto_telemetry_proto_rawDescOnce sync.Once
|
||||||
|
file_proto_telemetry_proto_rawDescData = file_proto_telemetry_proto_rawDesc
|
||||||
|
)
|
||||||
|
|
||||||
|
func file_proto_telemetry_proto_rawDescGZIP() []byte {
|
||||||
|
file_proto_telemetry_proto_rawDescOnce.Do(func() {
|
||||||
|
file_proto_telemetry_proto_rawDescData = protoimpl.X.CompressGZIP(file_proto_telemetry_proto_rawDescData)
|
||||||
|
})
|
||||||
|
return file_proto_telemetry_proto_rawDescData
|
||||||
|
}
|
||||||
|
|
||||||
|
var file_proto_telemetry_proto_msgTypes = make([]protoimpl.MessageInfo, 3)
|
||||||
|
var file_proto_telemetry_proto_goTypes = []any{
|
||||||
|
(*TelemetryData)(nil), // 0: telemetry.TelemetryData
|
||||||
|
(*TelemetryRequest)(nil), // 1: telemetry.TelemetryRequest
|
||||||
|
(*TelemetryResponse)(nil), // 2: telemetry.TelemetryResponse
|
||||||
|
}
|
||||||
|
var file_proto_telemetry_proto_depIdxs = []int32{
|
||||||
|
0, // 0: telemetry.TelemetryRequest.data:type_name -> telemetry.TelemetryData
|
||||||
|
1, // [1:1] is the sub-list for method output_type
|
||||||
|
1, // [1:1] is the sub-list for method input_type
|
||||||
|
1, // [1:1] is the sub-list for extension type_name
|
||||||
|
1, // [1:1] is the sub-list for extension extendee
|
||||||
|
0, // [0:1] is the sub-list for field type_name
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() { file_proto_telemetry_proto_init() }
|
||||||
|
func file_proto_telemetry_proto_init() {
|
||||||
|
if File_proto_telemetry_proto != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if !protoimpl.UnsafeEnabled {
|
||||||
|
file_proto_telemetry_proto_msgTypes[0].Exporter = func(v any, i int) any {
|
||||||
|
switch v := v.(*TelemetryData); i {
|
||||||
|
case 0:
|
||||||
|
return &v.state
|
||||||
|
case 1:
|
||||||
|
return &v.sizeCache
|
||||||
|
case 2:
|
||||||
|
return &v.unknownFields
|
||||||
|
default:
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
file_proto_telemetry_proto_msgTypes[1].Exporter = func(v any, i int) any {
|
||||||
|
switch v := v.(*TelemetryRequest); i {
|
||||||
|
case 0:
|
||||||
|
return &v.state
|
||||||
|
case 1:
|
||||||
|
return &v.sizeCache
|
||||||
|
case 2:
|
||||||
|
return &v.unknownFields
|
||||||
|
default:
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
file_proto_telemetry_proto_msgTypes[2].Exporter = func(v any, i int) any {
|
||||||
|
switch v := v.(*TelemetryResponse); i {
|
||||||
|
case 0:
|
||||||
|
return &v.state
|
||||||
|
case 1:
|
||||||
|
return &v.sizeCache
|
||||||
|
case 2:
|
||||||
|
return &v.unknownFields
|
||||||
|
default:
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
type x struct{}
|
||||||
|
out := protoimpl.TypeBuilder{
|
||||||
|
File: protoimpl.DescBuilder{
|
||||||
|
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
|
||||||
|
RawDescriptor: file_proto_telemetry_proto_rawDesc,
|
||||||
|
NumEnums: 0,
|
||||||
|
NumMessages: 3,
|
||||||
|
NumExtensions: 0,
|
||||||
|
NumServices: 0,
|
||||||
|
},
|
||||||
|
GoTypes: file_proto_telemetry_proto_goTypes,
|
||||||
|
DependencyIndexes: file_proto_telemetry_proto_depIdxs,
|
||||||
|
MessageInfos: file_proto_telemetry_proto_msgTypes,
|
||||||
|
}.Build()
|
||||||
|
File_proto_telemetry_proto = out.File
|
||||||
|
file_proto_telemetry_proto_rawDesc = nil
|
||||||
|
file_proto_telemetry_proto_goTypes = nil
|
||||||
|
file_proto_telemetry_proto_depIdxs = nil
|
||||||
|
}
|
||||||
52
telemetry/proto/telemetry.proto
Normal file
52
telemetry/proto/telemetry.proto
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
syntax = "proto3";
|
||||||
|
|
||||||
|
package telemetry;
|
||||||
|
|
||||||
|
option go_package = "github.com/seaweedfs/seaweedfs/telemetry/proto";
|
||||||
|
|
||||||
|
// TelemetryData represents cluster-level telemetry information
|
||||||
|
message TelemetryData {
|
||||||
|
// Unique cluster identifier (generated in-memory)
|
||||||
|
string cluster_id = 1;
|
||||||
|
|
||||||
|
// SeaweedFS version
|
||||||
|
string version = 2;
|
||||||
|
|
||||||
|
// Operating system (e.g., "linux/amd64")
|
||||||
|
string os = 3;
|
||||||
|
|
||||||
|
// Enabled features (e.g., ["filer", "s3api", "mq"])
|
||||||
|
repeated string features = 4;
|
||||||
|
|
||||||
|
// Deployment type ("standalone", "cluster", "master-only", "volume-only")
|
||||||
|
string deployment = 5;
|
||||||
|
|
||||||
|
// Number of volume servers in the cluster
|
||||||
|
int32 volume_server_count = 6;
|
||||||
|
|
||||||
|
// Total disk usage across all volume servers (in bytes)
|
||||||
|
uint64 total_disk_bytes = 7;
|
||||||
|
|
||||||
|
// Total number of volumes in the cluster
|
||||||
|
int32 total_volume_count = 8;
|
||||||
|
|
||||||
|
// Number of filer servers in the cluster
|
||||||
|
int32 filer_count = 9;
|
||||||
|
|
||||||
|
// Number of broker servers in the cluster
|
||||||
|
int32 broker_count = 10;
|
||||||
|
|
||||||
|
// Unix timestamp when the data was collected
|
||||||
|
int64 timestamp = 11;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TelemetryRequest is sent from SeaweedFS clusters to the telemetry server
|
||||||
|
message TelemetryRequest {
|
||||||
|
TelemetryData data = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TelemetryResponse is returned by the telemetry server
|
||||||
|
message TelemetryResponse {
|
||||||
|
bool success = 1;
|
||||||
|
string message = 2;
|
||||||
|
}
|
||||||
18
telemetry/server/Dockerfile
Normal file
18
telemetry/server/Dockerfile
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
FROM golang:1.21-alpine AS builder
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
COPY go.mod go.sum ./
|
||||||
|
RUN go mod download
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags '-extldflags "-static"' -o telemetry-server .
|
||||||
|
|
||||||
|
FROM alpine:latest
|
||||||
|
RUN apk --no-cache add ca-certificates
|
||||||
|
WORKDIR /root/
|
||||||
|
|
||||||
|
COPY --from=builder /app/telemetry-server .
|
||||||
|
|
||||||
|
EXPOSE 8080
|
||||||
|
|
||||||
|
CMD ["./telemetry-server"]
|
||||||
97
telemetry/server/Makefile
Normal file
97
telemetry/server/Makefile
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
.PHONY: build run clean test deps proto integration-test test-all
|
||||||
|
|
||||||
|
# Build the telemetry server
|
||||||
|
build:
|
||||||
|
go build -o telemetry-server .
|
||||||
|
|
||||||
|
# Run the server in development mode
|
||||||
|
run:
|
||||||
|
go run . -port=8080 -dashboard=true -cleanup=1h -max-age=24h
|
||||||
|
|
||||||
|
# Run the server in production mode
|
||||||
|
run-prod:
|
||||||
|
./telemetry-server -port=8080 -dashboard=true -cleanup=24h -max-age=720h
|
||||||
|
|
||||||
|
# Clean build artifacts
|
||||||
|
clean:
|
||||||
|
rm -f telemetry-server
|
||||||
|
rm -f ../test/telemetry-server-test.log
|
||||||
|
go clean
|
||||||
|
|
||||||
|
# Run unit tests
|
||||||
|
test:
|
||||||
|
go test ./...
|
||||||
|
|
||||||
|
# Run integration tests
|
||||||
|
integration-test:
|
||||||
|
@echo "🧪 Running telemetry integration tests..."
|
||||||
|
cd ../../ && go run telemetry/test/integration.go
|
||||||
|
|
||||||
|
# Run all tests (unit + integration)
|
||||||
|
test-all: test integration-test
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
deps:
|
||||||
|
go mod download
|
||||||
|
go mod tidy
|
||||||
|
|
||||||
|
# Generate protobuf code (requires protoc)
|
||||||
|
proto:
|
||||||
|
cd .. && protoc --go_out=. --go_opt=paths=source_relative proto/telemetry.proto
|
||||||
|
|
||||||
|
# Build Docker image
|
||||||
|
docker-build:
|
||||||
|
docker build -t seaweedfs-telemetry .
|
||||||
|
|
||||||
|
# Run with Docker
|
||||||
|
docker-run:
|
||||||
|
docker run -p 8080:8080 seaweedfs-telemetry -port=8080 -dashboard=true
|
||||||
|
|
||||||
|
# Development with auto-reload (requires air: go install github.com/cosmtrek/air@latest)
|
||||||
|
dev:
|
||||||
|
air
|
||||||
|
|
||||||
|
# Check if protoc is available
|
||||||
|
check-protoc:
|
||||||
|
@which protoc > /dev/null || (echo "protoc is required for proto generation. Install from https://grpc.io/docs/protoc-installation/" && exit 1)
|
||||||
|
|
||||||
|
# Full development setup
|
||||||
|
setup: check-protoc deps proto build
|
||||||
|
|
||||||
|
# Run a quick smoke test
|
||||||
|
smoke-test: build
|
||||||
|
@echo "🔥 Running smoke test..."
|
||||||
|
@timeout 10s ./telemetry-server -port=18081 > /dev/null 2>&1 & \
|
||||||
|
SERVER_PID=$$!; \
|
||||||
|
sleep 2; \
|
||||||
|
if curl -s http://localhost:18081/health > /dev/null; then \
|
||||||
|
echo "✅ Smoke test passed - server responds to health check"; \
|
||||||
|
else \
|
||||||
|
echo "❌ Smoke test failed - server not responding"; \
|
||||||
|
exit 1; \
|
||||||
|
fi; \
|
||||||
|
kill $$SERVER_PID 2>/dev/null || true
|
||||||
|
|
||||||
|
# Continuous integration target
|
||||||
|
ci: deps proto build test integration-test
|
||||||
|
@echo "🎉 All CI tests passed!"
|
||||||
|
|
||||||
|
# Help
|
||||||
|
help:
|
||||||
|
@echo "Available targets:"
|
||||||
|
@echo " build - Build the telemetry server binary"
|
||||||
|
@echo " run - Run server in development mode"
|
||||||
|
@echo " run-prod - Run server in production mode"
|
||||||
|
@echo " clean - Clean build artifacts"
|
||||||
|
@echo " test - Run unit tests"
|
||||||
|
@echo " integration-test- Run integration tests"
|
||||||
|
@echo " test-all - Run all tests (unit + integration)"
|
||||||
|
@echo " deps - Install Go dependencies"
|
||||||
|
@echo " proto - Generate protobuf code"
|
||||||
|
@echo " docker-build - Build Docker image"
|
||||||
|
@echo " docker-run - Run with Docker"
|
||||||
|
@echo " dev - Run with auto-reload (requires air)"
|
||||||
|
@echo " smoke-test - Quick server health check"
|
||||||
|
@echo " setup - Full development setup"
|
||||||
|
@echo " ci - Continuous integration (all tests)"
|
||||||
|
@echo " help - Show this help"
|
||||||
152
telemetry/server/api/handlers.go
Normal file
152
telemetry/server/api/handlers.go
Normal file
@@ -0,0 +1,152 @@
|
|||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"strconv"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/seaweedfs/seaweedfs/telemetry/proto"
|
||||||
|
"github.com/seaweedfs/seaweedfs/telemetry/server/storage"
|
||||||
|
protobuf "google.golang.org/protobuf/proto"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Handler struct {
|
||||||
|
storage *storage.PrometheusStorage
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewHandler(storage *storage.PrometheusStorage) *Handler {
|
||||||
|
return &Handler{storage: storage}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *Handler) CollectTelemetry(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method != http.MethodPost {
|
||||||
|
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
contentType := r.Header.Get("Content-Type")
|
||||||
|
|
||||||
|
// Only accept protobuf content type
|
||||||
|
if contentType != "application/x-protobuf" && contentType != "application/protobuf" {
|
||||||
|
http.Error(w, "Content-Type must be application/x-protobuf", http.StatusUnsupportedMediaType)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read protobuf request
|
||||||
|
body, err := io.ReadAll(r.Body)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to read request body", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
req := &proto.TelemetryRequest{}
|
||||||
|
if err := protobuf.Unmarshal(body, req); err != nil {
|
||||||
|
http.Error(w, "Invalid protobuf data", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
data := req.Data
|
||||||
|
if data == nil {
|
||||||
|
http.Error(w, "Missing telemetry data", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate required fields
|
||||||
|
if data.ClusterId == "" || data.Version == "" || data.Os == "" {
|
||||||
|
http.Error(w, "Missing required fields", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set timestamp if not provided
|
||||||
|
if data.Timestamp == 0 {
|
||||||
|
data.Timestamp = time.Now().Unix()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store the telemetry data
|
||||||
|
if err := h.storage.StoreTelemetry(data); err != nil {
|
||||||
|
http.Error(w, "Failed to store data", http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return protobuf response
|
||||||
|
resp := &proto.TelemetryResponse{
|
||||||
|
Success: true,
|
||||||
|
Message: "Telemetry data received",
|
||||||
|
}
|
||||||
|
|
||||||
|
respData, err := protobuf.Marshal(resp)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to marshal response", http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/x-protobuf")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write(respData)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *Handler) GetStats(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method != http.MethodGet {
|
||||||
|
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
stats, err := h.storage.GetStats()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to get stats", http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
json.NewEncoder(w).Encode(stats)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *Handler) GetInstances(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method != http.MethodGet {
|
||||||
|
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
limitStr := r.URL.Query().Get("limit")
|
||||||
|
limit := 100 // default
|
||||||
|
if limitStr != "" {
|
||||||
|
if l, err := strconv.Atoi(limitStr); err == nil && l > 0 && l <= 1000 {
|
||||||
|
limit = l
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
instances, err := h.storage.GetInstances(limit)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to get instances", http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
json.NewEncoder(w).Encode(instances)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *Handler) GetMetrics(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method != http.MethodGet {
|
||||||
|
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
daysStr := r.URL.Query().Get("days")
|
||||||
|
days := 30 // default
|
||||||
|
if daysStr != "" {
|
||||||
|
if d, err := strconv.Atoi(daysStr); err == nil && d > 0 && d <= 365 {
|
||||||
|
days = d
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
metrics, err := h.storage.GetMetrics(days)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to get metrics", http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
json.NewEncoder(w).Encode(metrics)
|
||||||
|
}
|
||||||
278
telemetry/server/dashboard/dashboard.go
Normal file
278
telemetry/server/dashboard/dashboard.go
Normal file
@@ -0,0 +1,278 @@
|
|||||||
|
package dashboard
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Handler struct{}
|
||||||
|
|
||||||
|
func NewHandler() *Handler {
|
||||||
|
return &Handler{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *Handler) ServeIndex(w http.ResponseWriter, r *http.Request) {
|
||||||
|
html := `<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>SeaweedFS Telemetry Dashboard</title>
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||||
|
<style>
|
||||||
|
body {
|
||||||
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||||
|
margin: 0;
|
||||||
|
padding: 20px;
|
||||||
|
background-color: #f5f5f5;
|
||||||
|
}
|
||||||
|
.container {
|
||||||
|
max-width: 1200px;
|
||||||
|
margin: 0 auto;
|
||||||
|
}
|
||||||
|
.header {
|
||||||
|
background: white;
|
||||||
|
padding: 20px;
|
||||||
|
border-radius: 8px;
|
||||||
|
margin-bottom: 20px;
|
||||||
|
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||||
|
}
|
||||||
|
.stats-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
|
||||||
|
gap: 20px;
|
||||||
|
margin-bottom: 20px;
|
||||||
|
}
|
||||||
|
.stat-card {
|
||||||
|
background: white;
|
||||||
|
padding: 20px;
|
||||||
|
border-radius: 8px;
|
||||||
|
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||||
|
}
|
||||||
|
.stat-value {
|
||||||
|
font-size: 2em;
|
||||||
|
font-weight: bold;
|
||||||
|
color: #2196F3;
|
||||||
|
}
|
||||||
|
.stat-label {
|
||||||
|
color: #666;
|
||||||
|
margin-top: 5px;
|
||||||
|
}
|
||||||
|
.chart-container {
|
||||||
|
background: white;
|
||||||
|
padding: 20px;
|
||||||
|
border-radius: 8px;
|
||||||
|
margin-bottom: 20px;
|
||||||
|
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||||
|
}
|
||||||
|
.chart-title {
|
||||||
|
font-size: 1.2em;
|
||||||
|
font-weight: bold;
|
||||||
|
margin-bottom: 15px;
|
||||||
|
}
|
||||||
|
.loading {
|
||||||
|
text-align: center;
|
||||||
|
padding: 40px;
|
||||||
|
color: #666;
|
||||||
|
}
|
||||||
|
.error {
|
||||||
|
background: #ffebee;
|
||||||
|
color: #c62828;
|
||||||
|
padding: 15px;
|
||||||
|
border-radius: 4px;
|
||||||
|
margin: 10px 0;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<div class="header">
|
||||||
|
<h1>SeaweedFS Telemetry Dashboard</h1>
|
||||||
|
<p>Privacy-respecting usage analytics for SeaweedFS</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="loading" class="loading">Loading telemetry data...</div>
|
||||||
|
<div id="error" class="error" style="display: none;"></div>
|
||||||
|
|
||||||
|
<div id="dashboard" style="display: none;">
|
||||||
|
<div class="stats-grid">
|
||||||
|
<div class="stat-card">
|
||||||
|
<div class="stat-value" id="totalInstances">-</div>
|
||||||
|
<div class="stat-label">Total Instances (30 days)</div>
|
||||||
|
</div>
|
||||||
|
<div class="stat-card">
|
||||||
|
<div class="stat-value" id="activeInstances">-</div>
|
||||||
|
<div class="stat-label">Active Instances (7 days)</div>
|
||||||
|
</div>
|
||||||
|
<div class="stat-card">
|
||||||
|
<div class="stat-value" id="totalVersions">-</div>
|
||||||
|
<div class="stat-label">Different Versions</div>
|
||||||
|
</div>
|
||||||
|
<div class="stat-card">
|
||||||
|
<div class="stat-value" id="totalOS">-</div>
|
||||||
|
<div class="stat-label">Operating Systems</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="chart-container">
|
||||||
|
<div class="chart-title">Version Distribution</div>
|
||||||
|
<canvas id="versionChart" width="400" height="200"></canvas>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="chart-container">
|
||||||
|
<div class="chart-title">Operating System Distribution</div>
|
||||||
|
<canvas id="osChart" width="400" height="200"></canvas>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="chart-container">
|
||||||
|
<div class="chart-title">Deployment Types</div>
|
||||||
|
<canvas id="deploymentChart" width="400" height="200"></canvas>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="chart-container">
|
||||||
|
<div class="chart-title">Volume Servers Over Time</div>
|
||||||
|
<canvas id="serverChart" width="400" height="200"></canvas>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="chart-container">
|
||||||
|
<div class="chart-title">Total Disk Usage Over Time</div>
|
||||||
|
<canvas id="diskChart" width="400" height="200"></canvas>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
let charts = {};
|
||||||
|
|
||||||
|
async function loadDashboard() {
|
||||||
|
try {
|
||||||
|
// Load stats
|
||||||
|
const statsResponse = await fetch('/api/stats');
|
||||||
|
const stats = await statsResponse.json();
|
||||||
|
|
||||||
|
// Load metrics
|
||||||
|
const metricsResponse = await fetch('/api/metrics?days=30');
|
||||||
|
const metrics = await metricsResponse.json();
|
||||||
|
|
||||||
|
updateStats(stats);
|
||||||
|
updateCharts(stats, metrics);
|
||||||
|
|
||||||
|
document.getElementById('loading').style.display = 'none';
|
||||||
|
document.getElementById('dashboard').style.display = 'block';
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error loading dashboard:', error);
|
||||||
|
showError('Failed to load telemetry data: ' + error.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateStats(stats) {
|
||||||
|
document.getElementById('totalInstances').textContent = stats.total_instances || 0;
|
||||||
|
document.getElementById('activeInstances').textContent = stats.active_instances || 0;
|
||||||
|
document.getElementById('totalVersions').textContent = Object.keys(stats.versions || {}).length;
|
||||||
|
document.getElementById('totalOS').textContent = Object.keys(stats.os_distribution || {}).length;
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateCharts(stats, metrics) {
|
||||||
|
// Version chart
|
||||||
|
createPieChart('versionChart', 'Version Distribution', stats.versions || {});
|
||||||
|
|
||||||
|
// OS chart
|
||||||
|
createPieChart('osChart', 'Operating System Distribution', stats.os_distribution || {});
|
||||||
|
|
||||||
|
// Deployment chart
|
||||||
|
createPieChart('deploymentChart', 'Deployment Types', stats.deployments || {});
|
||||||
|
|
||||||
|
// Server count over time
|
||||||
|
if (metrics.dates && metrics.server_counts) {
|
||||||
|
createLineChart('serverChart', 'Volume Servers', metrics.dates, metrics.server_counts, '#2196F3');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Disk usage over time
|
||||||
|
if (metrics.dates && metrics.disk_usage) {
|
||||||
|
const diskUsageGB = metrics.disk_usage.map(bytes => Math.round(bytes / (1024 * 1024 * 1024)));
|
||||||
|
createLineChart('diskChart', 'Disk Usage (GB)', metrics.dates, diskUsageGB, '#4CAF50');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function createPieChart(canvasId, title, data) {
|
||||||
|
const ctx = document.getElementById(canvasId).getContext('2d');
|
||||||
|
|
||||||
|
if (charts[canvasId]) {
|
||||||
|
charts[canvasId].destroy();
|
||||||
|
}
|
||||||
|
|
||||||
|
const labels = Object.keys(data);
|
||||||
|
const values = Object.values(data);
|
||||||
|
|
||||||
|
charts[canvasId] = new Chart(ctx, {
|
||||||
|
type: 'pie',
|
||||||
|
data: {
|
||||||
|
labels: labels,
|
||||||
|
datasets: [{
|
||||||
|
data: values,
|
||||||
|
backgroundColor: [
|
||||||
|
'#FF6384', '#36A2EB', '#FFCE56', '#4BC0C0',
|
||||||
|
'#9966FF', '#FF9F40', '#FF6384', '#C9CBCF'
|
||||||
|
]
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
options: {
|
||||||
|
responsive: true,
|
||||||
|
plugins: {
|
||||||
|
legend: {
|
||||||
|
position: 'bottom'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function createLineChart(canvasId, label, labels, data, color) {
|
||||||
|
const ctx = document.getElementById(canvasId).getContext('2d');
|
||||||
|
|
||||||
|
if (charts[canvasId]) {
|
||||||
|
charts[canvasId].destroy();
|
||||||
|
}
|
||||||
|
|
||||||
|
charts[canvasId] = new Chart(ctx, {
|
||||||
|
type: 'line',
|
||||||
|
data: {
|
||||||
|
labels: labels,
|
||||||
|
datasets: [{
|
||||||
|
label: label,
|
||||||
|
data: data,
|
||||||
|
borderColor: color,
|
||||||
|
backgroundColor: color + '20',
|
||||||
|
fill: true,
|
||||||
|
tension: 0.1
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
options: {
|
||||||
|
responsive: true,
|
||||||
|
scales: {
|
||||||
|
y: {
|
||||||
|
beginAtZero: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function showError(message) {
|
||||||
|
document.getElementById('loading').style.display = 'none';
|
||||||
|
document.getElementById('error').style.display = 'block';
|
||||||
|
document.getElementById('error').textContent = message;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load dashboard on page load
|
||||||
|
loadDashboard();
|
||||||
|
|
||||||
|
// Refresh every 5 minutes
|
||||||
|
setInterval(loadDashboard, 5 * 60 * 1000);
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>`
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "text/html")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
w.Write([]byte(html))
|
||||||
|
}
|
||||||
31
telemetry/server/go.sum
Normal file
31
telemetry/server/go.sum
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||||
|
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||||
|
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
|
||||||
|
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||||
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||||
|
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
|
||||||
|
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
|
||||||
|
github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
|
||||||
|
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||||
|
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
|
||||||
|
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||||
|
github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo=
|
||||||
|
github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
|
||||||
|
github.com/prometheus/client_golang v1.17.0 h1:rl2sfwZMtSthVU752MqfjQozy7blglC+1SOtjMAMh+Q=
|
||||||
|
github.com/prometheus/client_golang v1.17.0/go.mod h1:VeL+gMmOAxkS2IqfCq0ZmHSL+LjWfWDUmp1mBz9JgUY=
|
||||||
|
github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 h1:v7DLqVdK4VrYkVD5diGdl4sxJurKJEMnODWRJlxV9oM=
|
||||||
|
github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU=
|
||||||
|
github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY=
|
||||||
|
github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY=
|
||||||
|
github.com/prometheus/procfs v0.11.1 h1:xRC8Iq1yyca5ypa9n1EZnWZkt7dwcoRPQwX/5gwaUuI=
|
||||||
|
github.com/prometheus/procfs v0.11.1/go.mod h1:eesXgaPo1q7lBpVMoMy0ZOFTth9hBn4W/y0/p/ScXhY=
|
||||||
|
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
|
golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
|
||||||
|
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
|
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
|
||||||
|
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
|
||||||
|
google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8=
|
||||||
|
google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
|
||||||
111
telemetry/server/main.go
Normal file
111
telemetry/server/main.go
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||||
|
"github.com/seaweedfs/seaweedfs/telemetry/server/api"
|
||||||
|
"github.com/seaweedfs/seaweedfs/telemetry/server/dashboard"
|
||||||
|
"github.com/seaweedfs/seaweedfs/telemetry/server/storage"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
port = flag.Int("port", 8080, "HTTP server port")
|
||||||
|
enableCORS = flag.Bool("cors", true, "Enable CORS for dashboard")
|
||||||
|
logRequests = flag.Bool("log", true, "Log incoming requests")
|
||||||
|
enableDashboard = flag.Bool("dashboard", true, "Enable built-in dashboard (optional when using Grafana)")
|
||||||
|
cleanupInterval = flag.Duration("cleanup", 24*time.Hour, "Cleanup interval for old instances")
|
||||||
|
maxInstanceAge = flag.Duration("max-age", 30*24*time.Hour, "Maximum age for instances before cleanup")
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
// Create Prometheus storage instance
|
||||||
|
store := storage.NewPrometheusStorage()
|
||||||
|
|
||||||
|
// Start cleanup routine
|
||||||
|
go func() {
|
||||||
|
ticker := time.NewTicker(*cleanupInterval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
for range ticker.C {
|
||||||
|
store.CleanupOldInstances(*maxInstanceAge)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Setup HTTP handlers
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
|
||||||
|
// Prometheus metrics endpoint
|
||||||
|
mux.Handle("/metrics", promhttp.Handler())
|
||||||
|
|
||||||
|
// API endpoints
|
||||||
|
apiHandler := api.NewHandler(store)
|
||||||
|
mux.HandleFunc("/api/collect", corsMiddleware(logMiddleware(apiHandler.CollectTelemetry)))
|
||||||
|
mux.HandleFunc("/api/stats", corsMiddleware(logMiddleware(apiHandler.GetStats)))
|
||||||
|
mux.HandleFunc("/api/instances", corsMiddleware(logMiddleware(apiHandler.GetInstances)))
|
||||||
|
mux.HandleFunc("/api/metrics", corsMiddleware(logMiddleware(apiHandler.GetMetrics)))
|
||||||
|
|
||||||
|
// Dashboard (optional)
|
||||||
|
if *enableDashboard {
|
||||||
|
dashboardHandler := dashboard.NewHandler()
|
||||||
|
mux.HandleFunc("/", corsMiddleware(dashboardHandler.ServeIndex))
|
||||||
|
mux.HandleFunc("/dashboard", corsMiddleware(dashboardHandler.ServeIndex))
|
||||||
|
mux.Handle("/static/", http.StripPrefix("/static/", http.FileServer(http.Dir("./static"))))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Health check
|
||||||
|
mux.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
json.NewEncoder(w).Encode(map[string]string{
|
||||||
|
"status": "ok",
|
||||||
|
"time": time.Now().UTC().Format(time.RFC3339),
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
addr := fmt.Sprintf(":%d", *port)
|
||||||
|
log.Printf("Starting telemetry server on %s", addr)
|
||||||
|
log.Printf("Prometheus metrics: http://localhost%s/metrics", addr)
|
||||||
|
if *enableDashboard {
|
||||||
|
log.Printf("Dashboard: http://localhost%s/dashboard", addr)
|
||||||
|
}
|
||||||
|
log.Printf("Cleanup interval: %v, Max instance age: %v", *cleanupInterval, *maxInstanceAge)
|
||||||
|
|
||||||
|
if err := http.ListenAndServe(addr, mux); err != nil {
|
||||||
|
log.Fatalf("Server failed: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func corsMiddleware(next http.HandlerFunc) http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if *enableCORS {
|
||||||
|
w.Header().Set("Access-Control-Allow-Origin", "*")
|
||||||
|
w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
|
||||||
|
w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization")
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.Method == "OPTIONS" {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
next(w, r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func logMiddleware(next http.HandlerFunc) http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if *logRequests {
|
||||||
|
start := time.Now()
|
||||||
|
next(w, r)
|
||||||
|
log.Printf("%s %s %s %v", r.Method, r.URL.Path, r.RemoteAddr, time.Since(start))
|
||||||
|
} else {
|
||||||
|
next(w, r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
245
telemetry/server/storage/prometheus.go
Normal file
245
telemetry/server/storage/prometheus.go
Normal file
@@ -0,0 +1,245 @@
|
|||||||
|
package storage
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||||
|
"github.com/seaweedfs/seaweedfs/telemetry/proto"
|
||||||
|
)
|
||||||
|
|
||||||
|
type PrometheusStorage struct {
|
||||||
|
// Prometheus metrics
|
||||||
|
totalClusters prometheus.Gauge
|
||||||
|
activeClusters prometheus.Gauge
|
||||||
|
volumeServerCount *prometheus.GaugeVec
|
||||||
|
totalDiskBytes *prometheus.GaugeVec
|
||||||
|
totalVolumeCount *prometheus.GaugeVec
|
||||||
|
filerCount *prometheus.GaugeVec
|
||||||
|
brokerCount *prometheus.GaugeVec
|
||||||
|
clusterInfo *prometheus.GaugeVec
|
||||||
|
telemetryReceived prometheus.Counter
|
||||||
|
|
||||||
|
// In-memory storage for API endpoints (if needed)
|
||||||
|
mu sync.RWMutex
|
||||||
|
instances map[string]*telemetryData
|
||||||
|
stats map[string]interface{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// telemetryData is an internal struct that includes the received timestamp
|
||||||
|
type telemetryData struct {
|
||||||
|
*proto.TelemetryData
|
||||||
|
ReceivedAt time.Time `json:"received_at"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewPrometheusStorage() *PrometheusStorage {
|
||||||
|
return &PrometheusStorage{
|
||||||
|
totalClusters: promauto.NewGauge(prometheus.GaugeOpts{
|
||||||
|
Name: "seaweedfs_telemetry_total_clusters",
|
||||||
|
Help: "Total number of unique SeaweedFS clusters (last 30 days)",
|
||||||
|
}),
|
||||||
|
activeClusters: promauto.NewGauge(prometheus.GaugeOpts{
|
||||||
|
Name: "seaweedfs_telemetry_active_clusters",
|
||||||
|
Help: "Number of active SeaweedFS clusters (last 7 days)",
|
||||||
|
}),
|
||||||
|
volumeServerCount: promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Name: "seaweedfs_telemetry_volume_servers",
|
||||||
|
Help: "Number of volume servers per cluster",
|
||||||
|
}, []string{"cluster_id", "version", "os", "deployment"}),
|
||||||
|
totalDiskBytes: promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Name: "seaweedfs_telemetry_disk_bytes",
|
||||||
|
Help: "Total disk usage in bytes per cluster",
|
||||||
|
}, []string{"cluster_id", "version", "os", "deployment"}),
|
||||||
|
totalVolumeCount: promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Name: "seaweedfs_telemetry_volume_count",
|
||||||
|
Help: "Total number of volumes per cluster",
|
||||||
|
}, []string{"cluster_id", "version", "os", "deployment"}),
|
||||||
|
filerCount: promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Name: "seaweedfs_telemetry_filer_count",
|
||||||
|
Help: "Number of filer servers per cluster",
|
||||||
|
}, []string{"cluster_id", "version", "os", "deployment"}),
|
||||||
|
brokerCount: promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Name: "seaweedfs_telemetry_broker_count",
|
||||||
|
Help: "Number of broker servers per cluster",
|
||||||
|
}, []string{"cluster_id", "version", "os", "deployment"}),
|
||||||
|
clusterInfo: promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Name: "seaweedfs_telemetry_cluster_info",
|
||||||
|
Help: "Cluster information (always 1, labels contain metadata)",
|
||||||
|
}, []string{"cluster_id", "version", "os", "deployment", "features"}),
|
||||||
|
telemetryReceived: promauto.NewCounter(prometheus.CounterOpts{
|
||||||
|
Name: "seaweedfs_telemetry_reports_received_total",
|
||||||
|
Help: "Total number of telemetry reports received",
|
||||||
|
}),
|
||||||
|
instances: make(map[string]*telemetryData),
|
||||||
|
stats: make(map[string]interface{}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *PrometheusStorage) StoreTelemetry(data *proto.TelemetryData) error {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
|
// Update Prometheus metrics
|
||||||
|
labels := prometheus.Labels{
|
||||||
|
"cluster_id": data.ClusterId,
|
||||||
|
"version": data.Version,
|
||||||
|
"os": data.Os,
|
||||||
|
"deployment": data.Deployment,
|
||||||
|
}
|
||||||
|
|
||||||
|
s.volumeServerCount.With(labels).Set(float64(data.VolumeServerCount))
|
||||||
|
s.totalDiskBytes.With(labels).Set(float64(data.TotalDiskBytes))
|
||||||
|
s.totalVolumeCount.With(labels).Set(float64(data.TotalVolumeCount))
|
||||||
|
s.filerCount.With(labels).Set(float64(data.FilerCount))
|
||||||
|
s.brokerCount.With(labels).Set(float64(data.BrokerCount))
|
||||||
|
|
||||||
|
// Features as JSON string for the label
|
||||||
|
featuresJSON, _ := json.Marshal(data.Features)
|
||||||
|
infoLabels := prometheus.Labels{
|
||||||
|
"cluster_id": data.ClusterId,
|
||||||
|
"version": data.Version,
|
||||||
|
"os": data.Os,
|
||||||
|
"deployment": data.Deployment,
|
||||||
|
"features": string(featuresJSON),
|
||||||
|
}
|
||||||
|
s.clusterInfo.With(infoLabels).Set(1)
|
||||||
|
|
||||||
|
s.telemetryReceived.Inc()
|
||||||
|
|
||||||
|
// Store in memory for API endpoints
|
||||||
|
s.instances[data.ClusterId] = &telemetryData{
|
||||||
|
TelemetryData: data,
|
||||||
|
ReceivedAt: time.Now().UTC(),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update aggregated stats
|
||||||
|
s.updateStats()
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *PrometheusStorage) GetStats() (map[string]interface{}, error) {
|
||||||
|
s.mu.RLock()
|
||||||
|
defer s.mu.RUnlock()
|
||||||
|
|
||||||
|
// Return cached stats
|
||||||
|
result := make(map[string]interface{})
|
||||||
|
for k, v := range s.stats {
|
||||||
|
result[k] = v
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *PrometheusStorage) GetInstances(limit int) ([]*telemetryData, error) {
|
||||||
|
s.mu.RLock()
|
||||||
|
defer s.mu.RUnlock()
|
||||||
|
|
||||||
|
var instances []*telemetryData
|
||||||
|
count := 0
|
||||||
|
for _, instance := range s.instances {
|
||||||
|
if count >= limit {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
instances = append(instances, instance)
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
|
||||||
|
return instances, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *PrometheusStorage) GetMetrics(days int) (map[string]interface{}, error) {
|
||||||
|
s.mu.RLock()
|
||||||
|
defer s.mu.RUnlock()
|
||||||
|
|
||||||
|
// Return current metrics from in-memory storage
|
||||||
|
// Historical data should be queried from Prometheus directly
|
||||||
|
cutoff := time.Now().AddDate(0, 0, -days)
|
||||||
|
|
||||||
|
var volumeServers []map[string]interface{}
|
||||||
|
var diskUsage []map[string]interface{}
|
||||||
|
|
||||||
|
for _, instance := range s.instances {
|
||||||
|
if instance.ReceivedAt.After(cutoff) {
|
||||||
|
volumeServers = append(volumeServers, map[string]interface{}{
|
||||||
|
"date": instance.ReceivedAt.Format("2006-01-02"),
|
||||||
|
"value": instance.TelemetryData.VolumeServerCount,
|
||||||
|
})
|
||||||
|
diskUsage = append(diskUsage, map[string]interface{}{
|
||||||
|
"date": instance.ReceivedAt.Format("2006-01-02"),
|
||||||
|
"value": instance.TelemetryData.TotalDiskBytes,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return map[string]interface{}{
|
||||||
|
"volume_servers": volumeServers,
|
||||||
|
"disk_usage": diskUsage,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *PrometheusStorage) updateStats() {
|
||||||
|
now := time.Now()
|
||||||
|
last7Days := now.AddDate(0, 0, -7)
|
||||||
|
last30Days := now.AddDate(0, 0, -30)
|
||||||
|
|
||||||
|
totalInstances := 0
|
||||||
|
activeInstances := 0
|
||||||
|
versions := make(map[string]int)
|
||||||
|
osDistribution := make(map[string]int)
|
||||||
|
deployments := make(map[string]int)
|
||||||
|
|
||||||
|
for _, instance := range s.instances {
|
||||||
|
if instance.ReceivedAt.After(last30Days) {
|
||||||
|
totalInstances++
|
||||||
|
}
|
||||||
|
if instance.ReceivedAt.After(last7Days) {
|
||||||
|
activeInstances++
|
||||||
|
versions[instance.TelemetryData.Version]++
|
||||||
|
osDistribution[instance.TelemetryData.Os]++
|
||||||
|
deployments[instance.TelemetryData.Deployment]++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update Prometheus gauges
|
||||||
|
s.totalClusters.Set(float64(totalInstances))
|
||||||
|
s.activeClusters.Set(float64(activeInstances))
|
||||||
|
|
||||||
|
// Update cached stats for API
|
||||||
|
s.stats = map[string]interface{}{
|
||||||
|
"total_instances": totalInstances,
|
||||||
|
"active_instances": activeInstances,
|
||||||
|
"versions": versions,
|
||||||
|
"os_distribution": osDistribution,
|
||||||
|
"deployments": deployments,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// CleanupOldInstances removes instances older than the specified duration
|
||||||
|
func (s *PrometheusStorage) CleanupOldInstances(maxAge time.Duration) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
|
cutoff := time.Now().Add(-maxAge)
|
||||||
|
for instanceID, instance := range s.instances {
|
||||||
|
if instance.ReceivedAt.Before(cutoff) {
|
||||||
|
delete(s.instances, instanceID)
|
||||||
|
|
||||||
|
// Remove from Prometheus metrics
|
||||||
|
labels := prometheus.Labels{
|
||||||
|
"cluster_id": instance.TelemetryData.ClusterId,
|
||||||
|
"version": instance.TelemetryData.Version,
|
||||||
|
"os": instance.TelemetryData.Os,
|
||||||
|
"deployment": instance.TelemetryData.Deployment,
|
||||||
|
}
|
||||||
|
s.volumeServerCount.Delete(labels)
|
||||||
|
s.totalDiskBytes.Delete(labels)
|
||||||
|
s.totalVolumeCount.Delete(labels)
|
||||||
|
s.filerCount.Delete(labels)
|
||||||
|
s.brokerCount.Delete(labels)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
s.updateStats()
|
||||||
|
}
|
||||||
315
telemetry/test/integration.go
Normal file
315
telemetry/test/integration.go
Normal file
@@ -0,0 +1,315 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"syscall"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/seaweedfs/seaweedfs/telemetry/proto"
|
||||||
|
"github.com/seaweedfs/seaweedfs/weed/telemetry"
|
||||||
|
protobuf "google.golang.org/protobuf/proto"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
serverPort = "18080" // Use different port to avoid conflicts
|
||||||
|
serverURL = "http://localhost:" + serverPort
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
fmt.Println("🧪 Starting SeaweedFS Telemetry Integration Test")
|
||||||
|
|
||||||
|
// Start telemetry server
|
||||||
|
fmt.Println("📡 Starting telemetry server...")
|
||||||
|
serverCmd, err := startTelemetryServer()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("❌ Failed to start telemetry server: %v", err)
|
||||||
|
}
|
||||||
|
defer stopServer(serverCmd)
|
||||||
|
|
||||||
|
// Wait for server to start
|
||||||
|
if !waitForServer(serverURL+"/health", 15*time.Second) {
|
||||||
|
log.Fatal("❌ Telemetry server failed to start")
|
||||||
|
}
|
||||||
|
fmt.Println("✅ Telemetry server started successfully")
|
||||||
|
|
||||||
|
// Test protobuf marshaling first
|
||||||
|
fmt.Println("🔧 Testing protobuf marshaling...")
|
||||||
|
if err := testProtobufMarshaling(); err != nil {
|
||||||
|
log.Fatalf("❌ Protobuf marshaling test failed: %v", err)
|
||||||
|
}
|
||||||
|
fmt.Println("✅ Protobuf marshaling test passed")
|
||||||
|
|
||||||
|
// Test protobuf client
|
||||||
|
fmt.Println("🔄 Testing protobuf telemetry client...")
|
||||||
|
if err := testTelemetryClient(); err != nil {
|
||||||
|
log.Fatalf("❌ Telemetry client test failed: %v", err)
|
||||||
|
}
|
||||||
|
fmt.Println("✅ Telemetry client test passed")
|
||||||
|
|
||||||
|
// Test server metrics endpoint
|
||||||
|
fmt.Println("📊 Testing Prometheus metrics endpoint...")
|
||||||
|
if err := testMetricsEndpoint(); err != nil {
|
||||||
|
log.Fatalf("❌ Metrics endpoint test failed: %v", err)
|
||||||
|
}
|
||||||
|
fmt.Println("✅ Metrics endpoint test passed")
|
||||||
|
|
||||||
|
// Test stats API
|
||||||
|
fmt.Println("📈 Testing stats API...")
|
||||||
|
if err := testStatsAPI(); err != nil {
|
||||||
|
log.Fatalf("❌ Stats API test failed: %v", err)
|
||||||
|
}
|
||||||
|
fmt.Println("✅ Stats API test passed")
|
||||||
|
|
||||||
|
// Test instances API
|
||||||
|
fmt.Println("📋 Testing instances API...")
|
||||||
|
if err := testInstancesAPI(); err != nil {
|
||||||
|
log.Fatalf("❌ Instances API test failed: %v", err)
|
||||||
|
}
|
||||||
|
fmt.Println("✅ Instances API test passed")
|
||||||
|
|
||||||
|
fmt.Println("🎉 All telemetry integration tests passed!")
|
||||||
|
}
|
||||||
|
|
||||||
|
func startTelemetryServer() (*exec.Cmd, error) {
|
||||||
|
// Get the directory where this test is running
|
||||||
|
testDir, err := os.Getwd()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to get working directory: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Navigate to the server directory (from main seaweedfs directory)
|
||||||
|
serverDir := filepath.Join(testDir, "telemetry", "server")
|
||||||
|
|
||||||
|
cmd := exec.Command("go", "run", ".",
|
||||||
|
"-port="+serverPort,
|
||||||
|
"-dashboard=false",
|
||||||
|
"-cleanup=1m",
|
||||||
|
"-max-age=1h")
|
||||||
|
|
||||||
|
cmd.Dir = serverDir
|
||||||
|
|
||||||
|
// Create log files for server output
|
||||||
|
logFile, err := os.Create("telemetry-server-test.log")
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create log file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd.Stdout = logFile
|
||||||
|
cmd.Stderr = logFile
|
||||||
|
|
||||||
|
if err := cmd.Start(); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to start server: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return cmd, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func stopServer(cmd *exec.Cmd) {
|
||||||
|
if cmd != nil && cmd.Process != nil {
|
||||||
|
cmd.Process.Signal(syscall.SIGTERM)
|
||||||
|
cmd.Wait()
|
||||||
|
|
||||||
|
// Clean up log file
|
||||||
|
os.Remove("telemetry-server-test.log")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func waitForServer(url string, timeout time.Duration) bool {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
fmt.Printf("⏳ Waiting for server at %s...\n", url)
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return false
|
||||||
|
default:
|
||||||
|
resp, err := http.Get(url)
|
||||||
|
if err == nil {
|
||||||
|
resp.Body.Close()
|
||||||
|
if resp.StatusCode == http.StatusOK {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
time.Sleep(500 * time.Millisecond)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func testProtobufMarshaling() error {
|
||||||
|
// Test protobuf marshaling/unmarshaling
|
||||||
|
testData := &proto.TelemetryData{
|
||||||
|
ClusterId: "test-cluster-12345",
|
||||||
|
Version: "test-3.45",
|
||||||
|
Os: "linux/amd64",
|
||||||
|
Features: []string{"filer", "s3api"},
|
||||||
|
Deployment: "test",
|
||||||
|
VolumeServerCount: 2,
|
||||||
|
TotalDiskBytes: 1000000,
|
||||||
|
TotalVolumeCount: 10,
|
||||||
|
FilerCount: 1,
|
||||||
|
BrokerCount: 1,
|
||||||
|
Timestamp: time.Now().Unix(),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Marshal
|
||||||
|
data, err := protobuf.Marshal(testData)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to marshal protobuf: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf(" Protobuf size: %d bytes\n", len(data))
|
||||||
|
|
||||||
|
// Unmarshal
|
||||||
|
testData2 := &proto.TelemetryData{}
|
||||||
|
if err := protobuf.Unmarshal(data, testData2); err != nil {
|
||||||
|
return fmt.Errorf("failed to unmarshal protobuf: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify data
|
||||||
|
if testData2.ClusterId != testData.ClusterId {
|
||||||
|
return fmt.Errorf("protobuf data mismatch: expected %s, got %s",
|
||||||
|
testData.ClusterId, testData2.ClusterId)
|
||||||
|
}
|
||||||
|
|
||||||
|
if testData2.VolumeServerCount != testData.VolumeServerCount {
|
||||||
|
return fmt.Errorf("volume server count mismatch: expected %d, got %d",
|
||||||
|
testData.VolumeServerCount, testData2.VolumeServerCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func testTelemetryClient() error {
|
||||||
|
// Create telemetry client
|
||||||
|
client := telemetry.NewClient(serverURL+"/api/collect", true)
|
||||||
|
|
||||||
|
// Create test data using protobuf format
|
||||||
|
testData := &proto.TelemetryData{
|
||||||
|
Version: "test-3.45",
|
||||||
|
Os: "linux/amd64",
|
||||||
|
Features: []string{"filer", "s3api", "mq"},
|
||||||
|
Deployment: "integration-test",
|
||||||
|
VolumeServerCount: 3,
|
||||||
|
TotalDiskBytes: 1073741824, // 1GB
|
||||||
|
TotalVolumeCount: 50,
|
||||||
|
FilerCount: 2,
|
||||||
|
BrokerCount: 1,
|
||||||
|
Timestamp: time.Now().Unix(),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Send telemetry data
|
||||||
|
if err := client.SendTelemetry(testData); err != nil {
|
||||||
|
return fmt.Errorf("failed to send telemetry: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf(" Sent telemetry for cluster: %s\n", client.GetInstanceID())
|
||||||
|
|
||||||
|
// Wait a bit for processing
|
||||||
|
time.Sleep(2 * time.Second)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func testMetricsEndpoint() error {
|
||||||
|
resp, err := http.Get(serverURL + "/metrics")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to get metrics: %v", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return fmt.Errorf("metrics endpoint returned status %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read response and check for expected metrics
|
||||||
|
content, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to read metrics response: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
contentStr := string(content)
|
||||||
|
expectedMetrics := []string{
|
||||||
|
"seaweedfs_telemetry_total_clusters",
|
||||||
|
"seaweedfs_telemetry_active_clusters",
|
||||||
|
"seaweedfs_telemetry_reports_received_total",
|
||||||
|
"seaweedfs_telemetry_volume_servers",
|
||||||
|
"seaweedfs_telemetry_disk_bytes",
|
||||||
|
"seaweedfs_telemetry_volume_count",
|
||||||
|
"seaweedfs_telemetry_filer_count",
|
||||||
|
"seaweedfs_telemetry_broker_count",
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, metric := range expectedMetrics {
|
||||||
|
if !strings.Contains(contentStr, metric) {
|
||||||
|
return fmt.Errorf("missing expected metric: %s", metric)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that we have at least one report received
|
||||||
|
if !strings.Contains(contentStr, "seaweedfs_telemetry_reports_received_total 1") {
|
||||||
|
fmt.Printf(" Warning: Expected at least 1 report received, metrics content:\n%s\n", contentStr)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf(" Found %d expected metrics\n", len(expectedMetrics))
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func testStatsAPI() error {
|
||||||
|
resp, err := http.Get(serverURL + "/api/stats")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to get stats: %v", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return fmt.Errorf("stats API returned status %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read and verify JSON response
|
||||||
|
content, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to read stats response: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
contentStr := string(content)
|
||||||
|
if !strings.Contains(contentStr, "total_instances") {
|
||||||
|
return fmt.Errorf("stats response missing total_instances field")
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf(" Stats response: %s\n", contentStr)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func testInstancesAPI() error {
|
||||||
|
resp, err := http.Get(serverURL + "/api/instances?limit=10")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to get instances: %v", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return fmt.Errorf("instances API returned status %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read response
|
||||||
|
content, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to read instances response: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf(" Instances response length: %d bytes\n", len(content))
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
@@ -61,6 +61,8 @@ type MasterOptions struct {
|
|||||||
electionTimeout *time.Duration
|
electionTimeout *time.Duration
|
||||||
raftHashicorp *bool
|
raftHashicorp *bool
|
||||||
raftBootstrap *bool
|
raftBootstrap *bool
|
||||||
|
telemetryUrl *string
|
||||||
|
telemetryEnabled *bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
@@ -88,6 +90,8 @@ func init() {
|
|||||||
m.electionTimeout = cmdMaster.Flag.Duration("electionTimeout", 10*time.Second, "election timeout of master servers")
|
m.electionTimeout = cmdMaster.Flag.Duration("electionTimeout", 10*time.Second, "election timeout of master servers")
|
||||||
m.raftHashicorp = cmdMaster.Flag.Bool("raftHashicorp", false, "use hashicorp raft")
|
m.raftHashicorp = cmdMaster.Flag.Bool("raftHashicorp", false, "use hashicorp raft")
|
||||||
m.raftBootstrap = cmdMaster.Flag.Bool("raftBootstrap", false, "Whether to bootstrap the Raft cluster")
|
m.raftBootstrap = cmdMaster.Flag.Bool("raftBootstrap", false, "Whether to bootstrap the Raft cluster")
|
||||||
|
m.telemetryUrl = cmdMaster.Flag.String("telemetry.url", "https://telemetry.seaweedfs.com:3091/api/collect", "telemetry server URL to send usage statistics")
|
||||||
|
m.telemetryEnabled = cmdMaster.Flag.Bool("telemetry", false, "enable telemetry reporting")
|
||||||
}
|
}
|
||||||
|
|
||||||
var cmdMaster = &Command{
|
var cmdMaster = &Command{
|
||||||
@@ -332,5 +336,7 @@ func (m *MasterOptions) toMasterOption(whiteList []string) *weed_server.MasterOp
|
|||||||
DisableHttp: *m.disableHttp,
|
DisableHttp: *m.disableHttp,
|
||||||
MetricsAddress: *m.metricsAddress,
|
MetricsAddress: *m.metricsAddress,
|
||||||
MetricsIntervalSec: *m.metricsIntervalSec,
|
MetricsIntervalSec: *m.metricsIntervalSec,
|
||||||
|
TelemetryUrl: *m.telemetryUrl,
|
||||||
|
TelemetryEnabled: *m.telemetryEnabled,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -104,6 +104,8 @@ func init() {
|
|||||||
masterOptions.raftBootstrap = cmdServer.Flag.Bool("master.raftBootstrap", false, "Whether to bootstrap the Raft cluster")
|
masterOptions.raftBootstrap = cmdServer.Flag.Bool("master.raftBootstrap", false, "Whether to bootstrap the Raft cluster")
|
||||||
masterOptions.heartbeatInterval = cmdServer.Flag.Duration("master.heartbeatInterval", 300*time.Millisecond, "heartbeat interval of master servers, and will be randomly multiplied by [1, 1.25)")
|
masterOptions.heartbeatInterval = cmdServer.Flag.Duration("master.heartbeatInterval", 300*time.Millisecond, "heartbeat interval of master servers, and will be randomly multiplied by [1, 1.25)")
|
||||||
masterOptions.electionTimeout = cmdServer.Flag.Duration("master.electionTimeout", 10*time.Second, "election timeout of master servers")
|
masterOptions.electionTimeout = cmdServer.Flag.Duration("master.electionTimeout", 10*time.Second, "election timeout of master servers")
|
||||||
|
masterOptions.telemetryUrl = cmdServer.Flag.String("master.telemetry.url", "https://telemetry.seaweedfs.com:3091/api/collect", "telemetry server URL to send usage statistics")
|
||||||
|
masterOptions.telemetryEnabled = cmdServer.Flag.Bool("master.telemetry", false, "enable telemetry reporting")
|
||||||
|
|
||||||
filerOptions.filerGroup = cmdServer.Flag.String("filer.filerGroup", "", "share metadata with other filers in the same filerGroup")
|
filerOptions.filerGroup = cmdServer.Flag.String("filer.filerGroup", "", "share metadata with other filers in the same filerGroup")
|
||||||
filerOptions.collection = cmdServer.Flag.String("filer.collection", "", "all data will be stored in this collection")
|
filerOptions.collection = cmdServer.Flag.String("filer.collection", "", "all data will be stored in this collection")
|
||||||
|
|||||||
@@ -8,11 +8,13 @@ import (
|
|||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/seaweedfs/seaweedfs/weed/stats"
|
"github.com/seaweedfs/seaweedfs/weed/stats"
|
||||||
|
"github.com/seaweedfs/seaweedfs/weed/telemetry"
|
||||||
|
|
||||||
"github.com/seaweedfs/seaweedfs/weed/cluster"
|
"github.com/seaweedfs/seaweedfs/weed/cluster"
|
||||||
"github.com/seaweedfs/seaweedfs/weed/pb"
|
"github.com/seaweedfs/seaweedfs/weed/pb"
|
||||||
@@ -30,6 +32,7 @@ import (
|
|||||||
"github.com/seaweedfs/seaweedfs/weed/topology"
|
"github.com/seaweedfs/seaweedfs/weed/topology"
|
||||||
"github.com/seaweedfs/seaweedfs/weed/util"
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
||||||
util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
|
util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
|
||||||
|
"github.com/seaweedfs/seaweedfs/weed/util/version"
|
||||||
"github.com/seaweedfs/seaweedfs/weed/wdclient"
|
"github.com/seaweedfs/seaweedfs/weed/wdclient"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -52,6 +55,8 @@ type MasterOption struct {
|
|||||||
MetricsAddress string
|
MetricsAddress string
|
||||||
MetricsIntervalSec int
|
MetricsIntervalSec int
|
||||||
IsFollower bool
|
IsFollower bool
|
||||||
|
TelemetryUrl string
|
||||||
|
TelemetryEnabled bool
|
||||||
}
|
}
|
||||||
|
|
||||||
type MasterServer struct {
|
type MasterServer struct {
|
||||||
@@ -76,6 +81,9 @@ type MasterServer struct {
|
|||||||
adminLocks *AdminLocks
|
adminLocks *AdminLocks
|
||||||
|
|
||||||
Cluster *cluster.Cluster
|
Cluster *cluster.Cluster
|
||||||
|
|
||||||
|
// telemetry
|
||||||
|
telemetryCollector *telemetry.Collector
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewMasterServer(r *mux.Router, option *MasterOption, peers map[string]pb.ServerAddress) *MasterServer {
|
func NewMasterServer(r *mux.Router, option *MasterOption, peers map[string]pb.ServerAddress) *MasterServer {
|
||||||
@@ -131,6 +139,28 @@ func NewMasterServer(r *mux.Router, option *MasterOption, peers map[string]pb.Se
|
|||||||
ms.vg = topology.NewDefaultVolumeGrowth()
|
ms.vg = topology.NewDefaultVolumeGrowth()
|
||||||
glog.V(0).Infoln("Volume Size Limit is", ms.option.VolumeSizeLimitMB, "MB")
|
glog.V(0).Infoln("Volume Size Limit is", ms.option.VolumeSizeLimitMB, "MB")
|
||||||
|
|
||||||
|
// Initialize telemetry after topology is created
|
||||||
|
if option.TelemetryEnabled && option.TelemetryUrl != "" {
|
||||||
|
telemetryClient := telemetry.NewClient(option.TelemetryUrl, option.TelemetryEnabled)
|
||||||
|
ms.telemetryCollector = telemetry.NewCollector(telemetryClient, ms.Topo, ms.Cluster)
|
||||||
|
ms.telemetryCollector.SetMasterServer(ms)
|
||||||
|
|
||||||
|
// Set version and OS information
|
||||||
|
ms.telemetryCollector.SetVersion(version.VERSION_NUMBER)
|
||||||
|
ms.telemetryCollector.SetOS(runtime.GOOS + "/" + runtime.GOARCH)
|
||||||
|
|
||||||
|
// Determine features and deployment type
|
||||||
|
features := []string{"master"}
|
||||||
|
if len(peers) > 1 {
|
||||||
|
features = append(features, "cluster")
|
||||||
|
}
|
||||||
|
ms.telemetryCollector.SetFeatures(features)
|
||||||
|
ms.telemetryCollector.SetDeployment(telemetry.DetermineDeployment(true, false, len(peers)))
|
||||||
|
|
||||||
|
// Start periodic telemetry collection (every 24 hours)
|
||||||
|
ms.telemetryCollector.StartPeriodicCollection(24 * time.Hour)
|
||||||
|
}
|
||||||
|
|
||||||
ms.guard = security.NewGuard(append(ms.option.WhiteList, whiteList...), signingKey, expiresAfterSec, readSigningKey, readExpiresAfterSec)
|
ms.guard = security.NewGuard(append(ms.option.WhiteList, whiteList...), signingKey, expiresAfterSec, readSigningKey, readExpiresAfterSec)
|
||||||
|
|
||||||
handleStaticResources2(r)
|
handleStaticResources2(r)
|
||||||
|
|||||||
100
weed/telemetry/client.go
Normal file
100
weed/telemetry/client.go
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
package telemetry
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/google/uuid"
|
||||||
|
"github.com/seaweedfs/seaweedfs/telemetry/proto"
|
||||||
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
||||||
|
protobuf "google.golang.org/protobuf/proto"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Client struct {
|
||||||
|
url string
|
||||||
|
enabled bool
|
||||||
|
instanceID string
|
||||||
|
httpClient *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewClient creates a new telemetry client
|
||||||
|
func NewClient(url string, enabled bool) *Client {
|
||||||
|
return &Client{
|
||||||
|
url: url,
|
||||||
|
enabled: enabled,
|
||||||
|
instanceID: uuid.New().String(), // Generate UUID in memory only
|
||||||
|
httpClient: &http.Client{
|
||||||
|
Timeout: 10 * time.Second,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsEnabled returns whether telemetry is enabled
|
||||||
|
func (c *Client) IsEnabled() bool {
|
||||||
|
return c.enabled && c.url != ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// SendTelemetry sends telemetry data synchronously using protobuf format
|
||||||
|
func (c *Client) SendTelemetry(data *proto.TelemetryData) error {
|
||||||
|
if !c.IsEnabled() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set the cluster ID
|
||||||
|
data.ClusterId = c.instanceID
|
||||||
|
|
||||||
|
return c.sendProtobuf(data)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SendTelemetryAsync sends telemetry data asynchronously
|
||||||
|
func (c *Client) SendTelemetryAsync(data *proto.TelemetryData) {
|
||||||
|
if !c.IsEnabled() {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
if err := c.SendTelemetry(data); err != nil {
|
||||||
|
glog.V(1).Infof("Failed to send telemetry: %v", err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
// sendProtobuf sends data using protobuf format
|
||||||
|
func (c *Client) sendProtobuf(data *proto.TelemetryData) error {
|
||||||
|
req := &proto.TelemetryRequest{
|
||||||
|
Data: data,
|
||||||
|
}
|
||||||
|
|
||||||
|
body, err := protobuf.Marshal(req)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to marshal protobuf: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
httpReq, err := http.NewRequest("POST", c.url, bytes.NewBuffer(body))
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create request: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
httpReq.Header.Set("Content-Type", "application/x-protobuf")
|
||||||
|
httpReq.Header.Set("User-Agent", fmt.Sprintf("SeaweedFS/%s", data.Version))
|
||||||
|
|
||||||
|
resp, err := c.httpClient.Do(httpReq)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to send request: %v", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return fmt.Errorf("server returned status %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
glog.V(2).Infof("Telemetry sent successfully via protobuf")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetInstanceID returns the current instance ID
|
||||||
|
func (c *Client) GetInstanceID() string {
|
||||||
|
return c.instanceID
|
||||||
|
}
|
||||||
218
weed/telemetry/collector.go
Normal file
218
weed/telemetry/collector.go
Normal file
@@ -0,0 +1,218 @@
|
|||||||
|
package telemetry
|
||||||
|
|
||||||
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/seaweedfs/seaweedfs/telemetry/proto"
|
||||||
|
"github.com/seaweedfs/seaweedfs/weed/cluster"
|
||||||
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
||||||
|
"github.com/seaweedfs/seaweedfs/weed/topology"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Collector struct {
|
||||||
|
client *Client
|
||||||
|
topo *topology.Topology
|
||||||
|
cluster *cluster.Cluster
|
||||||
|
masterServer interface{} // Will be set to *weed_server.MasterServer to access client tracking
|
||||||
|
features []string
|
||||||
|
deployment string
|
||||||
|
version string
|
||||||
|
os string
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewCollector creates a new telemetry collector
|
||||||
|
func NewCollector(client *Client, topo *topology.Topology, cluster *cluster.Cluster) *Collector {
|
||||||
|
return &Collector{
|
||||||
|
client: client,
|
||||||
|
topo: topo,
|
||||||
|
cluster: cluster,
|
||||||
|
masterServer: nil,
|
||||||
|
features: []string{},
|
||||||
|
deployment: "unknown",
|
||||||
|
version: "unknown",
|
||||||
|
os: "unknown",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetFeatures sets the list of enabled features
|
||||||
|
func (c *Collector) SetFeatures(features []string) {
|
||||||
|
c.features = features
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetDeployment sets the deployment type (standalone, cluster, etc.)
|
||||||
|
func (c *Collector) SetDeployment(deployment string) {
|
||||||
|
c.deployment = deployment
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetVersion sets the SeaweedFS version
|
||||||
|
func (c *Collector) SetVersion(version string) {
|
||||||
|
c.version = version
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetOS sets the operating system information
|
||||||
|
func (c *Collector) SetOS(os string) {
|
||||||
|
c.os = os
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetMasterServer sets a reference to the master server for client tracking
|
||||||
|
func (c *Collector) SetMasterServer(masterServer interface{}) {
|
||||||
|
c.masterServer = masterServer
|
||||||
|
}
|
||||||
|
|
||||||
|
// CollectAndSendAsync collects telemetry data and sends it asynchronously
|
||||||
|
func (c *Collector) CollectAndSendAsync() {
|
||||||
|
if !c.client.IsEnabled() {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
data := c.collectData()
|
||||||
|
c.client.SendTelemetryAsync(data)
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
// StartPeriodicCollection starts sending telemetry data periodically
|
||||||
|
func (c *Collector) StartPeriodicCollection(interval time.Duration) {
|
||||||
|
if !c.client.IsEnabled() {
|
||||||
|
glog.V(1).Infof("Telemetry is disabled, skipping periodic collection")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
glog.V(0).Infof("Starting telemetry collection every %v", interval)
|
||||||
|
|
||||||
|
// Send initial telemetry after a short delay
|
||||||
|
go func() {
|
||||||
|
time.Sleep(30 * time.Second) // Wait for cluster to stabilize
|
||||||
|
c.CollectAndSendAsync()
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Start periodic collection
|
||||||
|
ticker := time.NewTicker(interval)
|
||||||
|
go func() {
|
||||||
|
defer ticker.Stop()
|
||||||
|
for range ticker.C {
|
||||||
|
c.CollectAndSendAsync()
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
// collectData gathers telemetry data from the topology
|
||||||
|
func (c *Collector) collectData() *proto.TelemetryData {
|
||||||
|
data := &proto.TelemetryData{
|
||||||
|
Version: c.version,
|
||||||
|
Os: c.os,
|
||||||
|
Features: c.features,
|
||||||
|
Deployment: c.deployment,
|
||||||
|
Timestamp: time.Now().Unix(),
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.topo != nil {
|
||||||
|
// Collect volume server count
|
||||||
|
data.VolumeServerCount = int32(c.countVolumeServers())
|
||||||
|
|
||||||
|
// Collect total disk usage and volume count
|
||||||
|
diskBytes, volumeCount := c.collectVolumeStats()
|
||||||
|
data.TotalDiskBytes = diskBytes
|
||||||
|
data.TotalVolumeCount = int32(volumeCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.cluster != nil {
|
||||||
|
// Collect filer and broker counts
|
||||||
|
data.FilerCount = int32(c.countFilers())
|
||||||
|
data.BrokerCount = int32(c.countBrokers())
|
||||||
|
}
|
||||||
|
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
|
// countVolumeServers counts the number of active volume servers
|
||||||
|
func (c *Collector) countVolumeServers() int {
|
||||||
|
count := 0
|
||||||
|
for _, dcNode := range c.topo.Children() {
|
||||||
|
dc := dcNode.(*topology.DataCenter)
|
||||||
|
for _, rackNode := range dc.Children() {
|
||||||
|
rack := rackNode.(*topology.Rack)
|
||||||
|
for range rack.Children() {
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return count
|
||||||
|
}
|
||||||
|
|
||||||
|
// collectVolumeStats collects total disk usage and volume count
|
||||||
|
func (c *Collector) collectVolumeStats() (uint64, int) {
|
||||||
|
var totalDiskBytes uint64
|
||||||
|
var totalVolumeCount int
|
||||||
|
|
||||||
|
for _, dcNode := range c.topo.Children() {
|
||||||
|
dc := dcNode.(*topology.DataCenter)
|
||||||
|
for _, rackNode := range dc.Children() {
|
||||||
|
rack := rackNode.(*topology.Rack)
|
||||||
|
for _, dnNode := range rack.Children() {
|
||||||
|
dn := dnNode.(*topology.DataNode)
|
||||||
|
volumes := dn.GetVolumes()
|
||||||
|
for _, volumeInfo := range volumes {
|
||||||
|
totalVolumeCount++
|
||||||
|
totalDiskBytes += volumeInfo.Size
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return totalDiskBytes, totalVolumeCount
|
||||||
|
}
|
||||||
|
|
||||||
|
// countFilers counts the number of active filer servers across all groups
|
||||||
|
func (c *Collector) countFilers() int {
|
||||||
|
// Count all filer-type nodes in the cluster
|
||||||
|
// This includes both pure filer servers and S3 servers (which register as filers)
|
||||||
|
count := 0
|
||||||
|
for _, groupName := range c.getAllFilerGroups() {
|
||||||
|
nodes := c.cluster.ListClusterNode(cluster.FilerGroupName(groupName), cluster.FilerType)
|
||||||
|
count += len(nodes)
|
||||||
|
}
|
||||||
|
return count
|
||||||
|
}
|
||||||
|
|
||||||
|
// countBrokers counts the number of active broker servers
|
||||||
|
func (c *Collector) countBrokers() int {
|
||||||
|
// Count brokers across all broker groups
|
||||||
|
count := 0
|
||||||
|
for _, groupName := range c.getAllBrokerGroups() {
|
||||||
|
nodes := c.cluster.ListClusterNode(cluster.FilerGroupName(groupName), cluster.BrokerType)
|
||||||
|
count += len(nodes)
|
||||||
|
}
|
||||||
|
return count
|
||||||
|
}
|
||||||
|
|
||||||
|
// getAllFilerGroups returns all filer group names
|
||||||
|
func (c *Collector) getAllFilerGroups() []string {
|
||||||
|
// For simplicity, we check the default group
|
||||||
|
// In a more sophisticated implementation, we could enumerate all groups
|
||||||
|
return []string{""}
|
||||||
|
}
|
||||||
|
|
||||||
|
// getAllBrokerGroups returns all broker group names
|
||||||
|
func (c *Collector) getAllBrokerGroups() []string {
|
||||||
|
// For simplicity, we check the default group
|
||||||
|
// In a more sophisticated implementation, we could enumerate all groups
|
||||||
|
return []string{""}
|
||||||
|
}
|
||||||
|
|
||||||
|
// DetermineDeployment determines the deployment type based on configuration
|
||||||
|
func DetermineDeployment(isMasterEnabled, isVolumeEnabled bool, peerCount int) string {
|
||||||
|
if isMasterEnabled && isVolumeEnabled {
|
||||||
|
if peerCount > 1 {
|
||||||
|
return "cluster"
|
||||||
|
}
|
||||||
|
return "standalone"
|
||||||
|
}
|
||||||
|
if isMasterEnabled {
|
||||||
|
return "master-only"
|
||||||
|
}
|
||||||
|
if isVolumeEnabled {
|
||||||
|
return "volume-only"
|
||||||
|
}
|
||||||
|
return "unknown"
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user