You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
40 KiB
40 KiB
Know Foolery - Detailed Observability Implementation Guidelines
Metrics Strategy
Application Metrics Collection
1. Business Metrics (Game-Specific)
// Business metrics for game insights
package metrics
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
var (
// Game session metrics
gamesStarted = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "games_started_total",
Help: "Total number of games started",
},
[]string{"player_type", "platform"},
)
gamesCompleted = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "games_completed_total",
Help: "Total number of games completed",
},
[]string{"completion_type", "platform"}, // normal, timeout, abandoned
)
sessionDuration = promauto.NewHistogramVec(
prometheus.HistogramOpts{
Name: "game_session_duration_seconds",
Help: "Duration of game sessions",
Buckets: []float64{60, 300, 600, 900, 1200, 1500, 1800}, // 1min to 30min
},
[]string{"completion_type"},
)
// Question and answer metrics
questionsAsked = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "questions_asked_total",
Help: "Total number of questions asked",
},
[]string{"theme", "difficulty"},
)
answersSubmitted = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "answers_submitted_total",
Help: "Total number of answers submitted",
},
[]string{"theme", "is_correct", "attempt_number", "used_hint"},
)
hintsRequested = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "hints_requested_total",
Help: "Total number of hints requested",
},
[]string{"theme", "question_difficulty"},
)
// Score distribution
scoreDistribution = promauto.NewHistogramVec(
prometheus.HistogramOpts{
Name: "game_scores",
Help: "Distribution of game scores",
Buckets: []float64{0, 5, 10, 15, 20, 25, 30, 40, 50, 60, 80, 100},
},
[]string{"session_duration_bucket"},
)
// Leaderboard metrics
leaderboardUpdates = promauto.NewCounter(
prometheus.CounterOpts{
Name: "leaderboard_updates_total",
Help: "Total number of leaderboard updates",
},
)
topScoreChanges = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "top_score_changes_total",
Help: "Changes in top 10 scores",
},
[]string{"position"}, // top_1, top_5, top_10
)
)
// Business metrics collection service
type GameMetrics struct {
registry prometheus.Registerer
}
func NewGameMetrics() *GameMetrics {
return &GameMetrics{
registry: prometheus.DefaultRegisterer,
}
}
func (m *GameMetrics) RecordGameStart(playerType, platform string) {
gamesStarted.WithLabelValues(playerType, platform).Inc()
}
func (m *GameMetrics) RecordGameCompletion(completionType, platform string, duration time.Duration) {
gamesCompleted.WithLabelValues(completionType, platform).Inc()
sessionDuration.WithLabelValues(completionType).Observe(duration.Seconds())
}
func (m *GameMetrics) RecordQuestionAsked(theme, difficulty string) {
questionsAsked.WithLabelValues(theme, difficulty).Inc()
}
func (m *GameMetrics) RecordAnswerSubmitted(theme string, isCorrect bool, attemptNum int, usedHint bool) {
answersSubmitted.WithLabelValues(
theme,
strconv.FormatBool(isCorrect),
strconv.Itoa(attemptNum),
strconv.FormatBool(usedHint),
).Inc()
}
func (m *GameMetrics) RecordFinalScore(score int, sessionDuration time.Duration) {
durationBucket := m.getDurationBucket(sessionDuration)
scoreDistribution.WithLabelValues(durationBucket).Observe(float64(score))
}
func (m *GameMetrics) getDurationBucket(duration time.Duration) string {
minutes := int(duration.Minutes())
switch {
case minutes <= 5:
return "0-5min"
case minutes <= 15:
return "5-15min"
case minutes <= 25:
return "15-25min"
default:
return "25-30min"
}
}
2. Technical Metrics (Infrastructure)
// Technical metrics for system health
var (
// HTTP metrics
httpRequestsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "http_requests_total",
Help: "Total number of HTTP requests",
},
[]string{"method", "endpoint", "status_code", "service"},
)
httpRequestDuration = promauto.NewHistogramVec(
prometheus.HistogramOpts{
Name: "http_request_duration_seconds",
Help: "HTTP request duration",
Buckets: prometheus.DefBuckets,
},
[]string{"method", "endpoint", "service"},
)
// Database metrics
dbConnectionsActive = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "db_connections_active",
Help: "Number of active database connections",
},
[]string{"database", "service"},
)
dbQueryDuration = promauto.NewHistogramVec(
prometheus.HistogramOpts{
Name: "db_query_duration_seconds",
Help: "Database query duration",
Buckets: []float64{0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 5.0},
},
[]string{"query_type", "table", "service"},
)
dbErrors = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "db_errors_total",
Help: "Total number of database errors",
},
[]string{"error_type", "service"},
)
// Cache metrics
cacheOperations = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "cache_operations_total",
Help: "Total number of cache operations",
},
[]string{"operation", "result", "service"}, // get/set/delete, hit/miss/error
)
cacheKeyCount = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "cache_keys_total",
Help: "Number of keys in cache",
},
[]string{"cache_type", "service"},
)
// Authentication metrics
authenticationAttempts = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "authentication_attempts_total",
Help: "Total authentication attempts",
},
[]string{"method", "result", "user_type"}, // jwt/oauth, success/failure, player/admin
)
tokenOperations = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "token_operations_total",
Help: "JWT token operations",
},
[]string{"operation", "result"}, // validate/refresh, success/failure
)
)
// Fiber middleware for HTTP metrics
func PrometheusMiddleware(serviceName string) fiber.Handler {
return func(c *fiber.Ctx) error {
start := time.Now()
// Process request
err := c.Next()
// Record metrics
duration := time.Since(start).Seconds()
statusCode := strconv.Itoa(c.Response().StatusCode())
httpRequestsTotal.WithLabelValues(
c.Method(),
c.Route().Path,
statusCode,
serviceName,
).Inc()
httpRequestDuration.WithLabelValues(
c.Method(),
c.Route().Path,
serviceName,
).Observe(duration)
return err
}
}
// Database metrics middleware for Ent
type MetricsHook struct {
serviceName string
}
func NewMetricsHook(serviceName string) *MetricsHook {
return &MetricsHook{serviceName: serviceName}
}
func (h *MetricsHook) Hook() ent.Hook {
return hook.On(
func(next ent.Mutator) ent.Mutator {
return ent.MutateFunc(func(ctx context.Context, m ent.Mutation) (ent.Value, error) {
start := time.Now()
result, err := next.Mutate(ctx, m)
duration := time.Since(start).Seconds()
queryType := strings.ToLower(m.Op().String())
table := m.Type()
dbQueryDuration.WithLabelValues(queryType, table, h.serviceName).Observe(duration)
if err != nil {
dbErrors.WithLabelValues("query_error", h.serviceName).Inc()
}
return result, err
})
},
ent.OpCreate|ent.OpUpdate|ent.OpUpdateOne|ent.OpDelete|ent.OpDeleteOne,
)
}
Frontend Metrics Collection
Web Application Metrics
// Frontend metrics collection
class MetricsCollector {
private endpoint: string
private batchSize: number = 50
private flushInterval: number = 30000 // 30 seconds
private metrics: MetricEvent[] = []
constructor(endpoint: string) {
this.endpoint = endpoint
this.startBatchFlush()
this.setupPerformanceObserver()
}
// User interaction metrics
trackUserAction(action: string, properties: Record<string, any> = {}): void {
this.addMetric({
type: 'user_action',
action,
timestamp: Date.now(),
session_id: this.getSessionId(),
user_agent: navigator.userAgent,
...properties,
})
}
// Game-specific metrics
trackGameEvent(event: GameEvent): void {
this.addMetric({
type: 'game_event',
event: event.type,
timestamp: Date.now(),
session_id: this.getSessionId(),
game_session_id: event.gameSessionId,
properties: event.properties,
})
}
// Performance metrics
trackPerformance(metric: PerformanceMetric): void {
this.addMetric({
type: 'performance',
metric: metric.name,
value: metric.value,
timestamp: Date.now(),
url: window.location.pathname,
})
}
// Error tracking
trackError(error: Error, context: string): void {
this.addMetric({
type: 'error',
error_message: error.message,
error_stack: error.stack,
context,
timestamp: Date.now(),
url: window.location.pathname,
user_agent: navigator.userAgent,
})
}
private setupPerformanceObserver(): void {
// Web Vitals tracking
if ('PerformanceObserver' in window) {
// Largest Contentful Paint
new PerformanceObserver((list) => {
list.getEntries().forEach((entry) => {
this.trackPerformance({
name: 'largest_contentful_paint',
value: entry.startTime,
})
})
}).observe({ entryTypes: ['largest-contentful-paint'] })
// First Input Delay
new PerformanceObserver((list) => {
list.getEntries().forEach((entry) => {
this.trackPerformance({
name: 'first_input_delay',
value: entry.processingStart - entry.startTime,
})
})
}).observe({ entryTypes: ['first-input'] })
// Cumulative Layout Shift
new PerformanceObserver((list) => {
let cumulativeScore = 0
list.getEntries().forEach((entry) => {
if (!entry.hadRecentInput) {
cumulativeScore += entry.value
}
})
this.trackPerformance({
name: 'cumulative_layout_shift',
value: cumulativeScore,
})
}).observe({ entryTypes: ['layout-shift'] })
}
// API response time tracking
this.interceptFetch()
}
private interceptFetch(): void {
const originalFetch = window.fetch
window.fetch = async (...args) => {
const start = performance.now()
const url = args[0].toString()
try {
const response = await originalFetch(...args)
const duration = performance.now() - start
this.trackPerformance({
name: 'api_request_duration',
value: duration,
url,
status: response.status,
})
return response
} catch (error) {
const duration = performance.now() - start
this.trackError(error as Error, `API request to ${url}`)
this.trackPerformance({
name: 'api_request_duration',
value: duration,
url,
status: 0,
})
throw error
}
}
}
private addMetric(metric: MetricEvent): void {
this.metrics.push(metric)
if (this.metrics.length >= this.batchSize) {
this.flush()
}
}
private async flush(): Promise<void> {
if (this.metrics.length === 0) return
const batch = [...this.metrics]
this.metrics = []
try {
await fetch(this.endpoint, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ metrics: batch }),
})
} catch (error) {
console.error('Failed to send metrics:', error)
// Re-queue metrics for retry
this.metrics.unshift(...batch)
}
}
private startBatchFlush(): void {
setInterval(() => {
this.flush()
}, this.flushInterval)
// Flush on page unload
window.addEventListener('beforeunload', () => {
this.flush()
})
}
private getSessionId(): string {
// Implementation depends on session management
return sessionStorage.getItem('session_id') || 'anonymous'
}
}
// Game-specific metrics tracking
export class GameMetricsTracker {
private collector: MetricsCollector
constructor(collector: MetricsCollector) {
this.collector = collector
}
trackGameStart(gameSessionId: string, playerName: string): void {
this.collector.trackGameEvent({
type: 'game_started',
gameSessionId,
properties: {
player_name: playerName,
platform: this.getPlatform(),
},
})
}
trackQuestionDisplayed(gameSessionId: string, questionId: string, theme: string): void {
this.collector.trackGameEvent({
type: 'question_displayed',
gameSessionId,
properties: {
question_id: questionId,
theme,
display_time: Date.now(),
},
})
}
trackAnswerSubmitted(
gameSessionId: string,
questionId: string,
isCorrect: boolean,
attemptNumber: number,
timeTaken: number,
usedHint: boolean
): void {
this.collector.trackGameEvent({
type: 'answer_submitted',
gameSessionId,
properties: {
question_id: questionId,
is_correct: isCorrect,
attempt_number: attemptNumber,
time_taken_ms: timeTaken,
used_hint: usedHint,
},
})
}
trackHintRequested(gameSessionId: string, questionId: string): void {
this.collector.trackGameEvent({
type: 'hint_requested',
gameSessionId,
properties: {
question_id: questionId,
request_time: Date.now(),
},
})
}
trackGameCompleted(
gameSessionId: string,
finalScore: number,
questionsAnswered: number,
completionType: 'normal' | 'timeout' | 'abandoned'
): void {
this.collector.trackGameEvent({
type: 'game_completed',
gameSessionId,
properties: {
final_score: finalScore,
questions_answered: questionsAnswered,
completion_type: completionType,
platform: this.getPlatform(),
},
})
}
private getPlatform(): string {
// Detect platform
if (/Android/i.test(navigator.userAgent)) return 'android'
if (/iPhone|iPad|iPod/i.test(navigator.userAgent)) return 'ios'
if (window.wails) return 'desktop' // For Wails apps
return 'web'
}
}
// Usage in React components
export const useGameMetrics = () => {
const collector = useRef(new MetricsCollector('/api/v1/metrics'))
const gameTracker = useRef(new GameMetricsTracker(collector.current))
return {
trackGameStart: gameTracker.current.trackGameStart.bind(gameTracker.current),
trackQuestionDisplayed: gameTracker.current.trackQuestionDisplayed.bind(gameTracker.current),
trackAnswerSubmitted: gameTracker.current.trackAnswerSubmitted.bind(gameTracker.current),
trackHintRequested: gameTracker.current.trackHintRequested.bind(gameTracker.current),
trackGameCompleted: gameTracker.current.trackGameCompleted.bind(gameTracker.current),
trackUserAction: collector.current.trackUserAction.bind(collector.current),
trackError: collector.current.trackError.bind(collector.current),
}
}
Distributed Tracing
OpenTelemetry Integration
Backend Tracing Setup
// OpenTelemetry tracing setup
package observability
import (
"context"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/exporters/jaeger"
"go.opentelemetry.io/otel/sdk/resource"
"go.opentelemetry.io/otel/sdk/trace"
"go.opentelemetry.io/otel/semconv/v1.12.0/httpconv"
"go.opentelemetry.io/otel/semconv/v1.12.0/netconv"
)
type TracingConfig struct {
ServiceName string
ServiceVersion string
Environment string
JaegerEndpoint string
SampleRate float64
}
func InitTracing(config TracingConfig) (*trace.TracerProvider, error) {
// Create Jaeger exporter
jaegerExporter, err := jaeger.New(
jaeger.WithCollectorEndpoint(jaeger.WithEndpoint(config.JaegerEndpoint)),
)
if err != nil {
return nil, err
}
// Create resource with service information
res, err := resource.New(
context.Background(),
resource.WithAttributes(
attribute.String("service.name", config.ServiceName),
attribute.String("service.version", config.ServiceVersion),
attribute.String("environment", config.Environment),
),
)
if err != nil {
return nil, err
}
// Create tracer provider
tp := trace.NewTracerProvider(
trace.WithBatcher(jaegerExporter),
trace.WithResource(res),
trace.WithSampler(trace.TraceIDRatioBased(config.SampleRate)),
)
// Set global tracer provider
otel.SetTracerProvider(tp)
return tp, nil
}
// Fiber middleware for distributed tracing
func TracingMiddleware(serviceName string) fiber.Handler {
tracer := otel.Tracer(serviceName)
return func(c *fiber.Ctx) error {
// Start span
ctx, span := tracer.Start(c.Context(), fmt.Sprintf("%s %s", c.Method(), c.Route().Path))
defer span.End()
// Set span attributes
span.SetAttributes(
httpconv.HTTPMethodKey.String(c.Method()),
httpconv.HTTPURLKey.String(string(c.Request().URI().FullURI())),
httpconv.HTTPUserAgentKey.String(string(c.Request().Header.UserAgent())),
netconv.NetPeerIPKey.String(c.IP()),
)
// Add to context
c.SetUserContext(ctx)
// Process request
err := c.Next()
// Set response attributes
span.SetAttributes(
httpconv.HTTPStatusCodeKey.Int(c.Response().StatusCode()),
)
if err != nil {
span.RecordError(err)
}
return err
}
}
// Service-level tracing helpers
func TraceServiceOperation(ctx context.Context, serviceName, operation string, fn func(context.Context) error) error {
tracer := otel.Tracer(serviceName)
ctx, span := tracer.Start(ctx, operation)
defer span.End()
err := fn(ctx)
if err != nil {
span.RecordError(err)
span.SetAttributes(attribute.Bool("error", true))
}
return err
}
// Database tracing for Ent
func TraceDatabaseOperation(ctx context.Context, operation, table string, fn func(context.Context) error) error {
tracer := otel.Tracer("database")
ctx, span := tracer.Start(ctx, fmt.Sprintf("db.%s.%s", operation, table))
defer span.End()
span.SetAttributes(
attribute.String("db.operation", operation),
attribute.String("db.table", table),
attribute.String("db.system", "postgresql"),
)
err := fn(ctx)
if err != nil {
span.RecordError(err)
}
return err
}
Frontend Tracing Integration
// Frontend tracing with OpenTelemetry
import { WebTracerProvider } from '@opentelemetry/sdk-trace-web'
import { getWebAutoInstrumentations } from '@opentelemetry/auto-instrumentations-web'
import { JaegerExporter } from '@opentelemetry/exporter-jaeger'
import { registerInstrumentations } from '@opentelemetry/instrumentation'
export class FrontendTracing {
private provider: WebTracerProvider
constructor(config: TracingConfig) {
this.provider = new WebTracerProvider({
resource: new Resource({
'service.name': config.serviceName,
'service.version': config.serviceVersion,
}),
})
// Configure Jaeger exporter
const jaegerExporter = new JaegerExporter({
endpoint: config.jaegerEndpoint,
})
this.provider.addSpanProcessor(
new BatchSpanProcessor(jaegerExporter)
)
// Register provider
this.provider.register()
// Auto-instrument browser APIs
registerInstrumentations({
instrumentations: [
getWebAutoInstrumentations({
'@opentelemetry/instrumentation-document-load': {
enabled: true,
},
'@opentelemetry/instrumentation-user-interaction': {
enabled: true,
},
'@opentelemetry/instrumentation-fetch': {
enabled: true,
propagateTraceHeaderCorsUrls: [
new RegExp(config.apiBaseUrl),
],
},
}),
],
})
}
// Game-specific tracing
traceGameAction(action: string, properties: Record<string, any>, fn: () => Promise<void>): Promise<void> {
const tracer = trace.getTracer('game-frontend')
return tracer.startActiveSpan(action, async (span) => {
try {
// Set span attributes
Object.entries(properties).forEach(([key, value]) => {
span.setAttributes({ [key]: value })
})
await fn()
} catch (error) {
span.recordException(error as Error)
span.setStatus({ code: SpanStatusCode.ERROR })
throw error
} finally {
span.end()
}
})
}
}
// React hook for tracing
export const useTracing = () => {
const tracer = trace.getTracer('react-components')
const traceUserAction = useCallback(
async (action: string, properties: Record<string, any>, fn: () => Promise<void>) => {
return tracer.startActiveSpan(`user.${action}`, async (span) => {
try {
span.setAttributes(properties)
await fn()
} catch (error) {
span.recordException(error as Error)
throw error
} finally {
span.end()
}
})
},
[tracer]
)
return { traceUserAction }
}
Logging Strategy
Structured Logging Implementation
Backend Logging
// Structured logging with zerolog
package logging
import (
"os"
"time"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
)
type Logger struct {
logger zerolog.Logger
}
type LogConfig struct {
Level string
Environment string
ServiceName string
Version string
}
func NewLogger(config LogConfig) *Logger {
// Parse log level
level, err := zerolog.ParseLevel(config.Level)
if err != nil {
level = zerolog.InfoLevel
}
// Configure zerolog
zerolog.SetGlobalLevel(level)
zerolog.TimeFieldFormat = time.RFC3339Nano
var logger zerolog.Logger
if config.Environment == "development" {
// Human-readable console output for development
logger = zerolog.New(zerolog.ConsoleWriter{
Out: os.Stdout,
TimeFormat: "15:04:05",
}).With().Timestamp().Logger()
} else {
// JSON output for production
logger = zerolog.New(os.Stdout).With().Timestamp().Logger()
}
// Add service metadata
logger = logger.With().
Str("service", config.ServiceName).
Str("version", config.Version).
Str("environment", config.Environment).
Logger()
return &Logger{logger: logger}
}
// Structured logging methods
func (l *Logger) GameEvent(event string, gameSessionID, userID string, properties map[string]interface{}) {
l.logger.Info().
Str("event_type", "game").
Str("event", event).
Str("game_session_id", gameSessionID).
Str("user_id", userID).
Fields(properties).
Msg("Game event occurred")
}
func (l *Logger) APIRequest(method, path string, statusCode int, duration time.Duration, userID string) {
l.logger.Info().
Str("event_type", "api_request").
Str("method", method).
Str("path", path).
Int("status_code", statusCode).
Dur("duration_ms", duration).
Str("user_id", userID).
Msg("API request processed")
}
func (l *Logger) DatabaseOperation(operation, table string, duration time.Duration, rowsAffected int64) {
l.logger.Debug().
Str("event_type", "database").
Str("operation", operation).
Str("table", table).
Dur("duration_ms", duration).
Int64("rows_affected", rowsAffected).
Msg("Database operation completed")
}
func (l *Logger) AuthenticationEvent(event, userID, userType string, success bool, details map[string]string) {
level := l.logger.Info()
if !success {
level = l.logger.Warn()
}
level.
Str("event_type", "authentication").
Str("event", event).
Str("user_id", userID).
Str("user_type", userType).
Bool("success", success).
Fields(details).
Msg("Authentication event")
}
func (l *Logger) SecurityEvent(event, userID, ipAddress string, severity string, details map[string]interface{}) {
l.logger.Warn().
Str("event_type", "security").
Str("event", event).
Str("user_id", userID).
Str("ip_address", ipAddress).
Str("severity", severity).
Fields(details).
Msg("Security event detected")
}
func (l *Logger) Error(err error, context string, fields map[string]interface{}) {
l.logger.Error().
Err(err).
Str("context", context).
Fields(fields).
Msg("Error occurred")
}
// Fiber middleware for request logging
func RequestLoggingMiddleware(logger *Logger) fiber.Handler {
return func(c *fiber.Ctx) error {
start := time.Now()
// Process request
err := c.Next()
// Log request
duration := time.Since(start)
userID := c.Locals("user_id")
if userID == nil {
userID = "anonymous"
}
logger.APIRequest(
c.Method(),
c.Route().Path,
c.Response().StatusCode(),
duration,
userID.(string),
)
return err
}
}
Frontend Logging
// Frontend structured logging
interface LogEntry {
timestamp: string
level: 'debug' | 'info' | 'warn' | 'error'
message: string
context?: string
userId?: string
sessionId?: string
gameSessionId?: string
error?: {
name: string
message: string
stack?: string
}
properties?: Record<string, any>
}
export class FrontendLogger {
private buffer: LogEntry[] = []
private endpoint: string
private maxBufferSize: number = 100
private flushInterval: number = 30000
constructor(endpoint: string) {
this.endpoint = endpoint
this.startPeriodicFlush()
}
debug(message: string, context?: string, properties?: Record<string, any>): void {
this.log('debug', message, context, properties)
}
info(message: string, context?: string, properties?: Record<string, any>): void {
this.log('info', message, context, properties)
}
warn(message: string, context?: string, properties?: Record<string, any>): void {
this.log('warn', message, context, properties)
}
error(message: string, error?: Error, context?: string, properties?: Record<string, any>): void {
const entry: LogEntry = {
timestamp: new Date().toISOString(),
level: 'error',
message,
context,
userId: this.getUserId(),
sessionId: this.getSessionId(),
gameSessionId: this.getGameSessionId(),
properties,
}
if (error) {
entry.error = {
name: error.name,
message: error.message,
stack: error.stack,
}
}
this.buffer.push(entry)
this.checkFlushConditions()
}
// Game-specific logging methods
logGameEvent(event: string, gameSessionId: string, properties?: Record<string, any>): void {
this.info(`Game event: ${event}`, 'game', {
gameSessionId,
...properties,
})
}
logUserAction(action: string, properties?: Record<string, any>): void {
this.info(`User action: ${action}`, 'user', properties)
}
logPerformanceMetric(metric: string, value: number, unit: string): void {
this.debug(`Performance metric: ${metric}`, 'performance', {
metric,
value,
unit,
url: window.location.pathname,
})
}
private log(level: LogEntry['level'], message: string, context?: string, properties?: Record<string, any>): void {
const entry: LogEntry = {
timestamp: new Date().toISOString(),
level,
message,
context,
userId: this.getUserId(),
sessionId: this.getSessionId(),
gameSessionId: this.getGameSessionId(),
properties,
}
this.buffer.push(entry)
this.checkFlushConditions()
}
private checkFlushConditions(): void {
if (this.buffer.length >= this.maxBufferSize) {
this.flush()
}
}
private async flush(): Promise<void> {
if (this.buffer.length === 0) return
const logs = [...this.buffer]
this.buffer = []
try {
await fetch(this.endpoint, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ logs }),
})
} catch (error) {
console.error('Failed to send logs:', error)
// Re-queue logs for retry (keep only most recent to avoid memory issues)
this.buffer.unshift(...logs.slice(-50))
}
}
private startPeriodicFlush(): void {
setInterval(() => {
this.flush()
}, this.flushInterval)
// Flush on page unload
window.addEventListener('beforeunload', () => {
// Use sendBeacon for reliable delivery during page unload
if (this.buffer.length > 0) {
navigator.sendBeacon(this.endpoint, JSON.stringify({ logs: this.buffer }))
}
})
}
private getUserId(): string | undefined {
// Implementation depends on auth system
return sessionStorage.getItem('user_id') || undefined
}
private getSessionId(): string | undefined {
return sessionStorage.getItem('session_id') || undefined
}
private getGameSessionId(): string | undefined {
return sessionStorage.getItem('game_session_id') || undefined
}
}
Monitoring Dashboards
Grafana Dashboard Configuration
1. Business Intelligence Dashboard
{
"dashboard": {
"title": "Know Foolery - Business Intelligence",
"panels": [
{
"title": "Active Games",
"type": "stat",
"targets": [
{
"expr": "sum(rate(games_started_total[5m])) * 300",
"legendFormat": "Games per 5min"
}
]
},
{
"title": "Game Completion Rate",
"type": "stat",
"targets": [
{
"expr": "rate(games_completed_total{completion_type=\"normal\"}[1h]) / rate(games_started_total[1h]) * 100",
"legendFormat": "Completion %"
}
]
},
{
"title": "Average Session Duration",
"type": "graph",
"targets": [
{
"expr": "histogram_quantile(0.5, rate(game_session_duration_seconds_bucket[5m]))",
"legendFormat": "Median"
},
{
"expr": "histogram_quantile(0.95, rate(game_session_duration_seconds_bucket[5m]))",
"legendFormat": "95th percentile"
}
]
},
{
"title": "Question Accuracy by Theme",
"type": "heatmap",
"targets": [
{
"expr": "rate(answers_submitted_total{is_correct=\"true\"}[1h]) / rate(answers_submitted_total[1h]) by (theme)",
"legendFormat": "{{theme}}"
}
]
},
{
"title": "Hint Usage Rate",
"type": "graph",
"targets": [
{
"expr": "rate(hints_requested_total[5m]) / rate(questions_asked_total[5m]) * 100",
"legendFormat": "Hint Usage %"
}
]
},
{
"title": "Score Distribution",
"type": "histogram",
"targets": [
{
"expr": "histogram_quantile(0.25, rate(game_scores_bucket[1h]))",
"legendFormat": "25th percentile"
},
{
"expr": "histogram_quantile(0.5, rate(game_scores_bucket[1h]))",
"legendFormat": "Median"
},
{
"expr": "histogram_quantile(0.75, rate(game_scores_bucket[1h]))",
"legendFormat": "75th percentile"
}
]
}
]
}
}
2. Technical Performance Dashboard
{
"dashboard": {
"title": "Know Foolery - Technical Performance",
"panels": [
{
"title": "API Response Times",
"type": "graph",
"targets": [
{
"expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) by (service)",
"legendFormat": "{{service}} - 95th percentile"
}
]
},
{
"title": "Error Rate",
"type": "graph",
"targets": [
{
"expr": "rate(http_requests_total{status_code=~\"5..\"}[5m]) / rate(http_requests_total[5m]) * 100 by (service)",
"legendFormat": "{{service}} - Error %"
}
]
},
{
"title": "Database Performance",
"type": "graph",
"targets": [
{
"expr": "histogram_quantile(0.95, rate(db_query_duration_seconds_bucket[5m])) by (service)",
"legendFormat": "{{service}} - Query Time"
}
]
},
{
"title": "Cache Hit Rate",
"type": "stat",
"targets": [
{
"expr": "rate(cache_operations_total{result=\"hit\"}[5m]) / rate(cache_operations_total{operation=\"get\"}[5m]) * 100",
"legendFormat": "Hit Rate %"
}
]
},
{
"title": "Authentication Success Rate",
"type": "graph",
"targets": [
{
"expr": "rate(authentication_attempts_total{result=\"success\"}[5m]) / rate(authentication_attempts_total[5m]) * 100 by (method)",
"legendFormat": "{{method}} - Success %"
}
]
}
]
}
}
Alerting Strategy
Alert Rules Configuration
Critical Alerts
# prometheus-alerts.yml
groups:
- name: know-foolery-critical
rules:
- alert: HighErrorRate
expr: rate(http_requests_total{status_code=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.05
for: 2m
labels:
severity: critical
team: backend
annotations:
summary: "High error rate detected in {{ $labels.service }}"
description: "Error rate is {{ $value | humanizePercentage }} for service {{ $labels.service }}"
- alert: DatabaseConnectionFailure
expr: db_connections_active == 0
for: 1m
labels:
severity: critical
team: backend
annotations:
summary: "Database connections dropped to zero"
description: "Service {{ $labels.service }} has no active database connections"
- alert: AuthenticationSystemDown
expr: up{service="zitadel"} == 0
for: 1m
labels:
severity: critical
team: security
annotations:
summary: "Authentication system is down"
description: "Zitadel authentication service is unreachable"
- alert: GameSessionsStuck
expr: increase(games_started_total[5m]) > increase(games_completed_total[5m]) * 2
for: 5m
labels:
severity: critical
team: backend
annotations:
summary: "Game sessions not completing"
description: "Many games are starting but not completing normally"
- name: know-foolery-warning
rules:
- alert: HighLatency
expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 1.0
for: 5m
labels:
severity: warning
team: backend
annotations:
summary: "High API latency detected"
description: "95th percentile latency is {{ $value }}s for {{ $labels.service }}"
- alert: LowGameCompletionRate
expr: rate(games_completed_total{completion_type="normal"}[1h]) / rate(games_started_total[1h]) < 0.7
for: 10m
labels:
severity: warning
team: product
annotations:
summary: "Low game completion rate"
description: "Only {{ $value | humanizePercentage }} of games are being completed normally"
- alert: HighHintUsage
expr: rate(hints_requested_total[1h]) / rate(questions_asked_total[1h]) > 0.8
for: 15m
labels:
severity: warning
team: product
annotations:
summary: "Unusually high hint usage"
description: "{{ $value | humanizePercentage }} of questions are requesting hints"
- name: know-foolery-security
rules:
- alert: HighAuthenticationFailures
expr: rate(authentication_attempts_total{result="failure"}[5m]) > 10
for: 2m
labels:
severity: warning
team: security
annotations:
summary: "High authentication failure rate"
description: "{{ $value }} authentication failures per second"
- alert: SuspiciousUserActivity
expr: rate(answers_submitted_total[1m]) by (user_id) > 5
for: 1m
labels:
severity: warning
team: security
annotations:
summary: "Suspicious user activity detected"
description: "User {{ $labels.user_id }} is submitting answers at {{ $value }}/second"
Alert Routing and Escalation
# alertmanager.yml
global:
slack_api_url: 'https://hooks.slack.com/services/...'
route:
group_by: ['alertname', 'service']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: 'default'
routes:
- match:
severity: critical
receiver: 'critical-alerts'
group_wait: 0s
- match:
team: security
receiver: 'security-team'
- match:
team: product
receiver: 'product-team'
receivers:
- name: 'default'
slack_configs:
- channel: '#alerts'
title: 'Know Foolery Alert'
text: '{{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'
- name: 'critical-alerts'
slack_configs:
- channel: '#critical-alerts'
title: 'CRITICAL: Know Foolery'
text: '{{ range .Alerts }}{{ .Annotations.summary }}\n{{ .Annotations.description }}{{ end }}'
pagerduty_configs:
- service_key: 'your-pagerduty-key'
description: '{{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'
- name: 'security-team'
slack_configs:
- channel: '#security-alerts'
title: 'Security Alert: Know Foolery'
text: '{{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'
- name: 'product-team'
slack_configs:
- channel: '#product-alerts'
title: 'Product Alert: Know Foolery'
text: '{{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'
This comprehensive observability strategy ensures that Know Foolery has full visibility into its performance, user behavior, and system health, enabling proactive issue resolution and data-driven product improvements.