You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

323 lines
7.0 KiB
Go

package middleware
import (
"fmt"
"net/http"
"sync"
"time"
"github.com/gofiber/fiber/v3"
"github.com/knowfoolery/backend/services/gateway-service/config"
)
type HealthChecker struct {
services map[string]*ServiceHealth
config config.ServicesConfig
client *http.Client
mutex sync.RWMutex
lastCheck time.Time
checkInterval time.Duration
}
type ServiceHealth struct {
Name string
URL string
Status ServiceStatus
LastCheck time.Time
LastError string
ResponseTime time.Duration
Uptime time.Duration
DownSince time.Time
CheckCount int64
FailCount int64
}
type ServiceStatus string
const (
ServiceStatusHealthy ServiceStatus = "healthy"
ServiceStatusUnhealthy ServiceStatus = "unhealthy"
ServiceStatusUnknown ServiceStatus = "unknown"
)
func NewHealthChecker(cfg config.ServicesConfig) *HealthChecker {
services := make(map[string]*ServiceHealth)
services["game-service"] = &ServiceHealth{
Name: "game-service",
URL: cfg.GameService.URL,
Status: ServiceStatusUnknown,
}
services["question-service"] = &ServiceHealth{
Name: "question-service",
URL: cfg.QuestionService.URL,
Status: ServiceStatusUnknown,
}
services["user-service"] = &ServiceHealth{
Name: "user-service",
URL: cfg.UserService.URL,
Status: ServiceStatusUnknown,
}
services["leaderboard-service"] = &ServiceHealth{
Name: "leaderboard-service",
URL: cfg.LeaderboardService.URL,
Status: ServiceStatusUnknown,
}
services["session-service"] = &ServiceHealth{
Name: "session-service",
URL: cfg.SessionService.URL,
Status: ServiceStatusUnknown,
}
services["admin-service"] = &ServiceHealth{
Name: "admin-service",
URL: cfg.AdminService.URL,
Status: ServiceStatusUnknown,
}
hc := &HealthChecker{
services: services,
config: cfg,
client: &http.Client{Timeout: 5 * time.Second},
checkInterval: cfg.HealthCheckInterval,
}
go hc.startHealthChecks()
return hc
}
func (hc *HealthChecker) Handler() fiber.Handler {
return func(c fiber.Ctx) error {
path := c.Path()
serviceName := hc.getServiceNameFromPath(path)
if serviceName == "" {
return c.Next()
}
hc.mutex.RLock()
service, exists := hc.services[serviceName]
hc.mutex.RUnlock()
if !exists {
return c.Next()
}
if service.Status == ServiceStatusUnhealthy {
return c.Status(fiber.StatusServiceUnavailable).JSON(fiber.Map{
"error": "Service temporarily unavailable",
"service": serviceName,
"status": service.Status,
"last_error": service.LastError,
})
}
c.Set("X-Service-Status", string(service.Status))
c.Set("X-Service-Response-Time", service.ResponseTime.String())
return c.Next()
}
}
func (hc *HealthChecker) startHealthChecks() {
ticker := time.NewTicker(hc.checkInterval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
hc.checkAllServices()
}
}
}
func (hc *HealthChecker) checkAllServices() {
var wg sync.WaitGroup
hc.mutex.RLock()
services := make([]*ServiceHealth, 0, len(hc.services))
for _, service := range hc.services {
services = append(services, service)
}
hc.mutex.RUnlock()
for _, service := range services {
wg.Add(1)
go func(s *ServiceHealth) {
defer wg.Done()
hc.checkService(s)
}(service)
}
wg.Wait()
hc.lastCheck = time.Now()
}
func (hc *HealthChecker) checkService(service *ServiceHealth) {
start := time.Now()
healthURL := service.URL + "/health"
resp, err := hc.client.Get(healthURL)
responseTime := time.Since(start)
hc.mutex.Lock()
defer hc.mutex.Unlock()
service.LastCheck = time.Now()
service.ResponseTime = responseTime
service.CheckCount++
if err != nil {
service.Status = ServiceStatusUnhealthy
service.LastError = err.Error()
service.FailCount++
if service.DownSince.IsZero() {
service.DownSince = time.Now()
}
} else {
defer resp.Body.Close()
if resp.StatusCode == http.StatusOK {
previousStatus := service.Status
service.Status = ServiceStatusHealthy
service.LastError = ""
if previousStatus == ServiceStatusUnhealthy && !service.DownSince.IsZero() {
service.Uptime += time.Since(service.DownSince)
service.DownSince = time.Time{}
}
} else {
service.Status = ServiceStatusUnhealthy
service.LastError = fmt.Sprintf("HTTP %d", resp.StatusCode)
service.FailCount++
if service.DownSince.IsZero() {
service.DownSince = time.Now()
}
}
}
}
func (hc *HealthChecker) getServiceNameFromPath(path string) string {
if len(path) < 8 {
return ""
}
if path[:8] != "/api/v1/" {
return ""
}
pathParts := []string{}
current := ""
for i := 8; i < len(path); i++ {
if path[i] == '/' {
if current != "" {
pathParts = append(pathParts, current)
current = ""
}
} else {
current += string(path[i])
}
}
if current != "" {
pathParts = append(pathParts, current)
}
if len(pathParts) == 0 {
return ""
}
switch pathParts[0] {
case "game":
return "game-service"
case "questions":
return "question-service"
case "users":
return "user-service"
case "leaderboard":
return "leaderboard-service"
case "sessions":
return "session-service"
case "admin":
return "admin-service"
default:
return ""
}
}
func (hc *HealthChecker) GetHealthStatus() map[string]interface{} {
hc.mutex.RLock()
defer hc.mutex.RUnlock()
status := make(map[string]interface{})
overallHealthy := true
healthyCount := 0
totalCount := len(hc.services)
services := make(map[string]interface{})
for name, service := range hc.services {
serviceInfo := map[string]interface{}{
"status": service.Status,
"url": service.URL,
"last_check": service.LastCheck,
"response_time": service.ResponseTime.String(),
"check_count": service.CheckCount,
"fail_count": service.FailCount,
}
if service.Status != ServiceStatusHealthy {
overallHealthy = false
serviceInfo["last_error"] = service.LastError
if !service.DownSince.IsZero() {
serviceInfo["down_since"] = service.DownSince
serviceInfo["down_duration"] = time.Since(service.DownSince).String()
}
} else {
healthyCount++
if service.Uptime > 0 {
serviceInfo["uptime"] = service.Uptime.String()
}
}
if service.CheckCount > 0 {
serviceInfo["success_rate"] = fmt.Sprintf("%.2f%%",
float64(service.CheckCount-service.FailCount)/float64(service.CheckCount)*100)
}
services[name] = serviceInfo
}
status["overall_status"] = "healthy"
if !overallHealthy {
status["overall_status"] = "degraded"
}
status["services"] = services
status["summary"] = map[string]interface{}{
"total_services": totalCount,
"healthy_services": healthyCount,
"unhealthy_services": totalCount - healthyCount,
"last_check": hc.lastCheck,
}
return status
}
func (hc *HealthChecker) IsServiceHealthy(serviceName string) bool {
hc.mutex.RLock()
defer hc.mutex.RUnlock()
if service, exists := hc.services[serviceName]; exists {
return service.Status == ServiceStatusHealthy
}
return false
}
func (hc *HealthChecker) ForceHealthCheck() {
go hc.checkAllServices()
}