// Package tracing provides distributed tracing for the KnowFoolery application. package tracing import ( "context" "fmt" "net/url" "strconv" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/codes" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" "go.opentelemetry.io/otel/sdk/resource" sdktrace "go.opentelemetry.io/otel/sdk/trace" semconv "go.opentelemetry.io/otel/semconv/v1.26.0" oteltrace "go.opentelemetry.io/otel/trace" "knowfoolery/backend/shared/infra/utils/envutil" ) // Config holds the configuration for the tracer. type Config struct { ServiceName string ServiceVersion string Environment string OTLPEndpoint string // JaegerEndpoint is a deprecated legacy alias kept for backward compatibility. JaegerEndpoint string SampleRate float64 Enabled bool } // DefaultConfig returns a default configuration. func DefaultConfig() Config { return Config{ ServiceName: "knowfoolery", ServiceVersion: "0.0.0", Environment: "development", OTLPEndpoint: "http://localhost:4318/v1/traces", JaegerEndpoint: "http://localhost:14268/api/traces", SampleRate: 1.0, // Sample all traces in development Enabled: false, } } // Tracer provides distributed tracing functionality. type Tracer struct { config Config provider *sdktrace.TracerProvider tracer oteltrace.Tracer enabled bool } // NewTracer creates a new Tracer. func NewTracer(config Config) (*Tracer, error) { if !config.Enabled { return &Tracer{ config: config, tracer: otel.Tracer(config.ServiceName), enabled: false, }, nil } sampleRate := config.SampleRate if sampleRate < 0 || sampleRate > 1 { sampleRate = 1.0 } endpoint := config.OTLPEndpoint if endpoint == "" { endpoint = config.JaegerEndpoint } exporter, err := newOTLPHTTPExporter(endpoint) if err != nil { return nil, fmt.Errorf("create otlp exporter: %w", err) } res, err := resource.New( context.Background(), resource.WithAttributes( semconv.ServiceName(config.ServiceName), semconv.ServiceVersion(config.ServiceVersion), attribute.String("deployment.environment", config.Environment), ), ) if err != nil { return nil, fmt.Errorf("create tracer resource: %w", err) } provider := sdktrace.NewTracerProvider( sdktrace.WithBatcher(exporter), sdktrace.WithSampler(sdktrace.TraceIDRatioBased(sampleRate)), sdktrace.WithResource(res), ) otel.SetTracerProvider(provider) return &Tracer{ config: config, provider: provider, tracer: otel.Tracer(config.ServiceName), enabled: true, }, nil } // Shutdown gracefully shuts down the tracer. func (t *Tracer) Shutdown(ctx context.Context) error { if !t.enabled || t.provider == nil { return nil } if err := t.provider.Shutdown(ctx); err != nil { return fmt.Errorf("shutdown tracer provider: %w", err) } return nil } // StartSpan starts a new span. func (t *Tracer) StartSpan(ctx context.Context, name string) (context.Context, Span) { if !t.enabled || t.tracer == nil { return ctx, &noopSpan{} } spanCtx, span := t.tracer.Start(ctx, name) return spanCtx, &otelSpan{span: span} } // Span represents a tracing span. type Span interface { // End ends the span. End() // SetAttribute sets an attribute on the span. SetAttribute(key string, value interface{}) // RecordError records an error on the span. RecordError(err error) } // noopSpan is a no-op implementation of Span. type noopSpan struct{} func (s *noopSpan) End() {} func (s *noopSpan) SetAttribute(key string, value interface{}) {} func (s *noopSpan) RecordError(err error) {} // otelSpan wraps an OpenTelemetry span with the local Span interface. type otelSpan struct { span oteltrace.Span } func (s *otelSpan) End() { s.span.End() } func (s *otelSpan) SetAttribute(key string, value interface{}) { switch v := value.(type) { case string: s.span.SetAttributes(attribute.String(key, v)) case int: s.span.SetAttributes(attribute.Int(key, v)) case int64: s.span.SetAttributes(attribute.Int64(key, v)) case float64: s.span.SetAttributes(attribute.Float64(key, v)) case bool: s.span.SetAttributes(attribute.Bool(key, v)) default: s.span.SetAttributes(attribute.String(key, fmt.Sprint(value))) } } func (s *otelSpan) RecordError(err error) { if err == nil { return } s.span.RecordError(err) s.span.SetStatus(codes.Error, err.Error()) } // TraceServiceOperation traces a service operation. func TraceServiceOperation(ctx context.Context, tracer *Tracer, serviceName, operation string, fn func(context.Context) error) error { ctx, span := tracer.StartSpan(ctx, fmt.Sprintf("%s.%s", serviceName, operation)) defer span.End() err := fn(ctx) if err != nil { span.RecordError(err) span.SetAttribute("error", true) } return err } // TraceDatabaseOperation traces a database operation. func TraceDatabaseOperation(ctx context.Context, tracer *Tracer, operation, table string, fn func(context.Context) error) error { ctx, span := tracer.StartSpan(ctx, fmt.Sprintf("db.%s.%s", operation, table)) defer span.End() span.SetAttribute("db.operation", operation) span.SetAttribute("db.table", table) span.SetAttribute("db.system", "postgresql") err := fn(ctx) if err != nil { span.RecordError(err) } return err } // ConfigFromEnv creates a Config from environment variables. func ConfigFromEnv() Config { cfg := DefaultConfig() cfg.Enabled = parseBoolEnv("TRACING_ENABLED", cfg.Enabled) cfg.ServiceName = envutil.String("TRACING_SERVICE_NAME", cfg.ServiceName) cfg.ServiceVersion = envutil.String("TRACING_SERVICE_VERSION", cfg.ServiceVersion) cfg.Environment = envutil.String("TRACING_ENVIRONMENT", cfg.Environment) legacyEndpoint := envutil.String("TRACING_JAEGER_ENDPOINT", "") if legacyEndpoint != "" { cfg.JaegerEndpoint = legacyEndpoint } cfg.OTLPEndpoint = envutil.String("TRACING_OTLP_ENDPOINT", "") if cfg.OTLPEndpoint == "" && legacyEndpoint != "" { cfg.OTLPEndpoint = legacyEndpoint } if cfg.OTLPEndpoint == "" { cfg.OTLPEndpoint = DefaultConfig().OTLPEndpoint } cfg.SampleRate = parseFloatEnv("TRACING_SAMPLE_RATE", cfg.SampleRate) return cfg } func newOTLPHTTPExporter(endpoint string) (sdktrace.SpanExporter, error) { parsed, err := url.Parse(endpoint) if err != nil { return nil, fmt.Errorf("parse endpoint: %w", err) } if parsed.Scheme == "" || parsed.Host == "" { return nil, fmt.Errorf("endpoint must be absolute URL, got %q", endpoint) } if parsed.Scheme != "http" && parsed.Scheme != "https" { return nil, fmt.Errorf("unsupported endpoint scheme %q", parsed.Scheme) } urlPath := parsed.EscapedPath() if urlPath == "" || urlPath == "/" { urlPath = "/v1/traces" } options := []otlptracehttp.Option{ otlptracehttp.WithEndpoint(parsed.Host), otlptracehttp.WithURLPath(urlPath), } if parsed.Scheme == "http" { options = append(options, otlptracehttp.WithInsecure()) } exporter, err := otlptracehttp.New(context.Background(), options...) if err != nil { return nil, err } return exporter, nil } func parseBoolEnv(key string, fallback bool) bool { value := envutil.String(key, "") if value == "" { return fallback } parsed, err := strconv.ParseBool(value) if err != nil { return fallback } return parsed } func parseFloatEnv(key string, fallback float64) float64 { value := envutil.String(key, "") if value == "" { return fallback } parsed, err := strconv.ParseFloat(value, 64) if err != nil { return fallback } return parsed }