You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
323 lines
7.0 KiB
Go
323 lines
7.0 KiB
Go
package middleware
|
|
|
|
import (
|
|
"fmt"
|
|
"net/http"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/gofiber/fiber/v3"
|
|
|
|
"github.com/knowfoolery/backend/services/gateway-service/config"
|
|
)
|
|
|
|
type HealthChecker struct {
|
|
services map[string]*ServiceHealth
|
|
config config.ServicesConfig
|
|
client *http.Client
|
|
mutex sync.RWMutex
|
|
lastCheck time.Time
|
|
checkInterval time.Duration
|
|
}
|
|
|
|
type ServiceHealth struct {
|
|
Name string
|
|
URL string
|
|
Status ServiceStatus
|
|
LastCheck time.Time
|
|
LastError string
|
|
ResponseTime time.Duration
|
|
Uptime time.Duration
|
|
DownSince time.Time
|
|
CheckCount int64
|
|
FailCount int64
|
|
}
|
|
|
|
type ServiceStatus string
|
|
|
|
const (
|
|
ServiceStatusHealthy ServiceStatus = "healthy"
|
|
ServiceStatusUnhealthy ServiceStatus = "unhealthy"
|
|
ServiceStatusUnknown ServiceStatus = "unknown"
|
|
)
|
|
|
|
func NewHealthChecker(cfg config.ServicesConfig) *HealthChecker {
|
|
services := make(map[string]*ServiceHealth)
|
|
|
|
services["game-service"] = &ServiceHealth{
|
|
Name: "game-service",
|
|
URL: cfg.GameService.URL,
|
|
Status: ServiceStatusUnknown,
|
|
}
|
|
services["question-service"] = &ServiceHealth{
|
|
Name: "question-service",
|
|
URL: cfg.QuestionService.URL,
|
|
Status: ServiceStatusUnknown,
|
|
}
|
|
services["user-service"] = &ServiceHealth{
|
|
Name: "user-service",
|
|
URL: cfg.UserService.URL,
|
|
Status: ServiceStatusUnknown,
|
|
}
|
|
services["leaderboard-service"] = &ServiceHealth{
|
|
Name: "leaderboard-service",
|
|
URL: cfg.LeaderboardService.URL,
|
|
Status: ServiceStatusUnknown,
|
|
}
|
|
services["session-service"] = &ServiceHealth{
|
|
Name: "session-service",
|
|
URL: cfg.SessionService.URL,
|
|
Status: ServiceStatusUnknown,
|
|
}
|
|
services["admin-service"] = &ServiceHealth{
|
|
Name: "admin-service",
|
|
URL: cfg.AdminService.URL,
|
|
Status: ServiceStatusUnknown,
|
|
}
|
|
|
|
hc := &HealthChecker{
|
|
services: services,
|
|
config: cfg,
|
|
client: &http.Client{Timeout: 5 * time.Second},
|
|
checkInterval: cfg.HealthCheckInterval,
|
|
}
|
|
|
|
go hc.startHealthChecks()
|
|
|
|
return hc
|
|
}
|
|
|
|
func (hc *HealthChecker) Handler() fiber.Handler {
|
|
return func(c fiber.Ctx) error {
|
|
path := c.Path()
|
|
|
|
serviceName := hc.getServiceNameFromPath(path)
|
|
if serviceName == "" {
|
|
return c.Next()
|
|
}
|
|
|
|
hc.mutex.RLock()
|
|
service, exists := hc.services[serviceName]
|
|
hc.mutex.RUnlock()
|
|
|
|
if !exists {
|
|
return c.Next()
|
|
}
|
|
|
|
if service.Status == ServiceStatusUnhealthy {
|
|
return c.Status(fiber.StatusServiceUnavailable).JSON(fiber.Map{
|
|
"error": "Service temporarily unavailable",
|
|
"service": serviceName,
|
|
"status": service.Status,
|
|
"last_error": service.LastError,
|
|
})
|
|
}
|
|
|
|
c.Set("X-Service-Status", string(service.Status))
|
|
c.Set("X-Service-Response-Time", service.ResponseTime.String())
|
|
|
|
return c.Next()
|
|
}
|
|
}
|
|
|
|
func (hc *HealthChecker) startHealthChecks() {
|
|
ticker := time.NewTicker(hc.checkInterval)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-ticker.C:
|
|
hc.checkAllServices()
|
|
}
|
|
}
|
|
}
|
|
|
|
func (hc *HealthChecker) checkAllServices() {
|
|
var wg sync.WaitGroup
|
|
|
|
hc.mutex.RLock()
|
|
services := make([]*ServiceHealth, 0, len(hc.services))
|
|
for _, service := range hc.services {
|
|
services = append(services, service)
|
|
}
|
|
hc.mutex.RUnlock()
|
|
|
|
for _, service := range services {
|
|
wg.Add(1)
|
|
go func(s *ServiceHealth) {
|
|
defer wg.Done()
|
|
hc.checkService(s)
|
|
}(service)
|
|
}
|
|
|
|
wg.Wait()
|
|
hc.lastCheck = time.Now()
|
|
}
|
|
|
|
func (hc *HealthChecker) checkService(service *ServiceHealth) {
|
|
start := time.Now()
|
|
|
|
healthURL := service.URL + "/health"
|
|
|
|
resp, err := hc.client.Get(healthURL)
|
|
responseTime := time.Since(start)
|
|
|
|
hc.mutex.Lock()
|
|
defer hc.mutex.Unlock()
|
|
|
|
service.LastCheck = time.Now()
|
|
service.ResponseTime = responseTime
|
|
service.CheckCount++
|
|
|
|
if err != nil {
|
|
service.Status = ServiceStatusUnhealthy
|
|
service.LastError = err.Error()
|
|
service.FailCount++
|
|
|
|
if service.DownSince.IsZero() {
|
|
service.DownSince = time.Now()
|
|
}
|
|
} else {
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode == http.StatusOK {
|
|
previousStatus := service.Status
|
|
service.Status = ServiceStatusHealthy
|
|
service.LastError = ""
|
|
|
|
if previousStatus == ServiceStatusUnhealthy && !service.DownSince.IsZero() {
|
|
service.Uptime += time.Since(service.DownSince)
|
|
service.DownSince = time.Time{}
|
|
}
|
|
} else {
|
|
service.Status = ServiceStatusUnhealthy
|
|
service.LastError = fmt.Sprintf("HTTP %d", resp.StatusCode)
|
|
service.FailCount++
|
|
|
|
if service.DownSince.IsZero() {
|
|
service.DownSince = time.Now()
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (hc *HealthChecker) getServiceNameFromPath(path string) string {
|
|
if len(path) < 8 {
|
|
return ""
|
|
}
|
|
|
|
if path[:8] != "/api/v1/" {
|
|
return ""
|
|
}
|
|
|
|
pathParts := []string{}
|
|
current := ""
|
|
for i := 8; i < len(path); i++ {
|
|
if path[i] == '/' {
|
|
if current != "" {
|
|
pathParts = append(pathParts, current)
|
|
current = ""
|
|
}
|
|
} else {
|
|
current += string(path[i])
|
|
}
|
|
}
|
|
if current != "" {
|
|
pathParts = append(pathParts, current)
|
|
}
|
|
|
|
if len(pathParts) == 0 {
|
|
return ""
|
|
}
|
|
|
|
switch pathParts[0] {
|
|
case "game":
|
|
return "game-service"
|
|
case "questions":
|
|
return "question-service"
|
|
case "users":
|
|
return "user-service"
|
|
case "leaderboard":
|
|
return "leaderboard-service"
|
|
case "sessions":
|
|
return "session-service"
|
|
case "admin":
|
|
return "admin-service"
|
|
default:
|
|
return ""
|
|
}
|
|
}
|
|
|
|
func (hc *HealthChecker) GetHealthStatus() map[string]interface{} {
|
|
hc.mutex.RLock()
|
|
defer hc.mutex.RUnlock()
|
|
|
|
status := make(map[string]interface{})
|
|
overallHealthy := true
|
|
healthyCount := 0
|
|
totalCount := len(hc.services)
|
|
|
|
services := make(map[string]interface{})
|
|
|
|
for name, service := range hc.services {
|
|
serviceInfo := map[string]interface{}{
|
|
"status": service.Status,
|
|
"url": service.URL,
|
|
"last_check": service.LastCheck,
|
|
"response_time": service.ResponseTime.String(),
|
|
"check_count": service.CheckCount,
|
|
"fail_count": service.FailCount,
|
|
}
|
|
|
|
if service.Status != ServiceStatusHealthy {
|
|
overallHealthy = false
|
|
serviceInfo["last_error"] = service.LastError
|
|
if !service.DownSince.IsZero() {
|
|
serviceInfo["down_since"] = service.DownSince
|
|
serviceInfo["down_duration"] = time.Since(service.DownSince).String()
|
|
}
|
|
} else {
|
|
healthyCount++
|
|
if service.Uptime > 0 {
|
|
serviceInfo["uptime"] = service.Uptime.String()
|
|
}
|
|
}
|
|
|
|
if service.CheckCount > 0 {
|
|
serviceInfo["success_rate"] = fmt.Sprintf("%.2f%%",
|
|
float64(service.CheckCount-service.FailCount)/float64(service.CheckCount)*100)
|
|
}
|
|
|
|
services[name] = serviceInfo
|
|
}
|
|
|
|
status["overall_status"] = "healthy"
|
|
if !overallHealthy {
|
|
status["overall_status"] = "degraded"
|
|
}
|
|
|
|
status["services"] = services
|
|
status["summary"] = map[string]interface{}{
|
|
"total_services": totalCount,
|
|
"healthy_services": healthyCount,
|
|
"unhealthy_services": totalCount - healthyCount,
|
|
"last_check": hc.lastCheck,
|
|
}
|
|
|
|
return status
|
|
}
|
|
|
|
func (hc *HealthChecker) IsServiceHealthy(serviceName string) bool {
|
|
hc.mutex.RLock()
|
|
defer hc.mutex.RUnlock()
|
|
|
|
if service, exists := hc.services[serviceName]; exists {
|
|
return service.Status == ServiceStatusHealthy
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func (hc *HealthChecker) ForceHealthCheck() {
|
|
go hc.checkAllServices()
|
|
} |