You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

302 lines
7.4 KiB
Go

// Package tracing provides distributed tracing for the KnowFoolery application.
package tracing
import (
"context"
"fmt"
"net/url"
"strconv"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
"go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.26.0"
oteltrace "go.opentelemetry.io/otel/trace"
"knowfoolery/backend/shared/infra/utils/envutil"
)
// Config holds the configuration for the tracer.
type Config struct {
ServiceName string
ServiceVersion string
Environment string
OTLPEndpoint string
// JaegerEndpoint is a deprecated legacy alias kept for backward compatibility.
JaegerEndpoint string
SampleRate float64
Enabled bool
}
// DefaultConfig returns a default configuration.
func DefaultConfig() Config {
return Config{
ServiceName: "knowfoolery",
ServiceVersion: "0.0.0",
Environment: "development",
OTLPEndpoint: "http://localhost:4318/v1/traces",
JaegerEndpoint: "http://localhost:14268/api/traces",
SampleRate: 1.0, // Sample all traces in development
Enabled: false,
}
}
// Tracer provides distributed tracing functionality.
type Tracer struct {
config Config
provider *sdktrace.TracerProvider
tracer oteltrace.Tracer
enabled bool
}
// NewTracer creates a new Tracer.
func NewTracer(config Config) (*Tracer, error) {
if !config.Enabled {
return &Tracer{
config: config,
tracer: otel.Tracer(config.ServiceName),
enabled: false,
}, nil
}
sampleRate := config.SampleRate
if sampleRate < 0 || sampleRate > 1 {
sampleRate = 1.0
}
endpoint := config.OTLPEndpoint
if endpoint == "" {
endpoint = config.JaegerEndpoint
}
exporter, err := newOTLPHTTPExporter(endpoint)
if err != nil {
return nil, fmt.Errorf("create otlp exporter: %w", err)
}
res, err := resource.New(
context.Background(),
resource.WithAttributes(
semconv.ServiceName(config.ServiceName),
semconv.ServiceVersion(config.ServiceVersion),
attribute.String("deployment.environment", config.Environment),
),
)
if err != nil {
return nil, fmt.Errorf("create tracer resource: %w", err)
}
provider := sdktrace.NewTracerProvider(
sdktrace.WithBatcher(exporter),
sdktrace.WithSampler(sdktrace.TraceIDRatioBased(sampleRate)),
sdktrace.WithResource(res),
)
otel.SetTracerProvider(provider)
return &Tracer{
config: config,
provider: provider,
tracer: otel.Tracer(config.ServiceName),
enabled: true,
}, nil
}
// Shutdown gracefully shuts down the tracer.
func (t *Tracer) Shutdown(ctx context.Context) error {
if !t.enabled || t.provider == nil {
return nil
}
if err := t.provider.Shutdown(ctx); err != nil {
return fmt.Errorf("shutdown tracer provider: %w", err)
}
return nil
}
// StartSpan starts a new span.
func (t *Tracer) StartSpan(ctx context.Context, name string) (context.Context, Span) {
if !t.enabled || t.tracer == nil {
return ctx, &noopSpan{}
}
spanCtx, span := t.tracer.Start(ctx, name)
return spanCtx, &otelSpan{span: span}
}
// Span represents a tracing span.
type Span interface {
// End ends the span.
End()
// SetAttribute sets an attribute on the span.
SetAttribute(key string, value interface{})
// RecordError records an error on the span.
RecordError(err error)
}
// noopSpan is a no-op implementation of Span.
type noopSpan struct{}
func (s *noopSpan) End() {}
func (s *noopSpan) SetAttribute(key string, value interface{}) {}
func (s *noopSpan) RecordError(err error) {}
// otelSpan wraps an OpenTelemetry span with the local Span interface.
type otelSpan struct {
span oteltrace.Span
}
func (s *otelSpan) End() {
s.span.End()
}
func (s *otelSpan) SetAttribute(key string, value interface{}) {
switch v := value.(type) {
case string:
s.span.SetAttributes(attribute.String(key, v))
case int:
s.span.SetAttributes(attribute.Int(key, v))
case int64:
s.span.SetAttributes(attribute.Int64(key, v))
case float64:
s.span.SetAttributes(attribute.Float64(key, v))
case bool:
s.span.SetAttributes(attribute.Bool(key, v))
default:
s.span.SetAttributes(attribute.String(key, fmt.Sprint(value)))
}
}
func (s *otelSpan) RecordError(err error) {
if err == nil {
return
}
s.span.RecordError(err)
s.span.SetStatus(codes.Error, err.Error())
}
// TraceServiceOperation traces a service operation.
func TraceServiceOperation(ctx context.Context, tracer *Tracer, serviceName,
operation string, fn func(context.Context) error) error {
ctx, span := tracer.StartSpan(ctx, fmt.Sprintf("%s.%s", serviceName, operation))
defer span.End()
err := fn(ctx)
if err != nil {
span.RecordError(err)
span.SetAttribute("error", true)
}
return err
}
// TraceDatabaseOperation traces a database operation.
func TraceDatabaseOperation(ctx context.Context, tracer *Tracer, operation,
table string, fn func(context.Context) error) error {
ctx, span := tracer.StartSpan(ctx, fmt.Sprintf("db.%s.%s", operation, table))
defer span.End()
span.SetAttribute("db.operation", operation)
span.SetAttribute("db.table", table)
span.SetAttribute("db.system", "postgresql")
err := fn(ctx)
if err != nil {
span.RecordError(err)
}
return err
}
// ConfigFromEnv creates a Config from environment variables.
func ConfigFromEnv() Config {
cfg := DefaultConfig()
cfg.Enabled = parseBoolEnv("TRACING_ENABLED", cfg.Enabled)
cfg.ServiceName = envutil.String("TRACING_SERVICE_NAME", cfg.ServiceName)
cfg.ServiceVersion = envutil.String("TRACING_SERVICE_VERSION", cfg.ServiceVersion)
cfg.Environment = envutil.String("TRACING_ENVIRONMENT", cfg.Environment)
legacyEndpoint := envutil.String("TRACING_JAEGER_ENDPOINT", "")
if legacyEndpoint != "" {
cfg.JaegerEndpoint = legacyEndpoint
}
cfg.OTLPEndpoint = envutil.String("TRACING_OTLP_ENDPOINT", "")
if cfg.OTLPEndpoint == "" && legacyEndpoint != "" {
cfg.OTLPEndpoint = legacyEndpoint
}
if cfg.OTLPEndpoint == "" {
cfg.OTLPEndpoint = DefaultConfig().OTLPEndpoint
}
cfg.SampleRate = parseFloatEnv("TRACING_SAMPLE_RATE", cfg.SampleRate)
return cfg
}
func newOTLPHTTPExporter(endpoint string) (sdktrace.SpanExporter, error) {
parsed, err := url.Parse(endpoint)
if err != nil {
return nil, fmt.Errorf("parse endpoint: %w", err)
}
if parsed.Scheme == "" || parsed.Host == "" {
return nil, fmt.Errorf("endpoint must be absolute URL, got %q", endpoint)
}
if parsed.Scheme != "http" && parsed.Scheme != "https" {
return nil, fmt.Errorf("unsupported endpoint scheme %q", parsed.Scheme)
}
urlPath := parsed.EscapedPath()
if urlPath == "" || urlPath == "/" {
urlPath = "/v1/traces"
}
options := []otlptracehttp.Option{
otlptracehttp.WithEndpoint(parsed.Host),
otlptracehttp.WithURLPath(urlPath),
}
if parsed.Scheme == "http" {
options = append(options, otlptracehttp.WithInsecure())
}
exporter, err := otlptracehttp.New(context.Background(), options...)
if err != nil {
return nil, err
}
return exporter, nil
}
func parseBoolEnv(key string, fallback bool) bool {
value := envutil.String(key, "")
if value == "" {
return fallback
}
parsed, err := strconv.ParseBool(value)
if err != nil {
return fallback
}
return parsed
}
func parseFloatEnv(key string, fallback float64) float64 {
value := envutil.String(key, "")
if value == "" {
return fallback
}
parsed, err := strconv.ParseFloat(value, 64)
if err != nil {
return fallback
}
return parsed
}