Added openai classifier

3 months ago · d9c7497acb
parent a14b02ad3c
commit d9c7497acb
7 changed files with 252 additions and 10 deletions
--- a/.env.example
+++ b/.env.example
@ -1,6 +1,11 @@
 GOOGLE_CLIENT_SECRETS_FILE=credentials.json
 GOOGLE_TOKEN_FILE=token.json
 AGENT_API_KEY=change-me
+LLM_API_KEY=
+LLM_MODEL=gpt-4.1-mini
+LLM_BASE_URL=
+LLM_TIMEOUT_SECONDS=20
+LLM_FALLBACK_TO_RULES=false
 GMAIL_SCAN_INTERVAL_MINUTES=5
 GMAIL_QUERY=in:inbox -label:AgentProcessed newer_than:7d
 LOG_LEVEL=INFO
--- a/README.md
+++ b/README.md
@ -3,6 +3,7 @@
 This project runs a small local API service that:

 - scans new Gmail inbox messages
+- classifies emails with an LLM as `LINKEDIN`, `ADVERTISING`, or `OTHER`
 - moves LinkedIn emails to a `LinkedIn` label/folder
 - moves advertising emails to an `Advertising` label/folder
 - exposes a secure availability endpoint powered by Google Calendar free/busy
@ -11,6 +12,7 @@ This project runs a small local API service that:

 - Python 3.11+
 - A Google account
+- An OpenAI-compatible API key for the LLM classifier
 - A Google Cloud project with:
  - Gmail API enabled
  - Google Calendar API enabled
@ -37,6 +39,7 @@ cp .env.example .env
 Edit `.env` and set:

 - `AGENT_API_KEY` to a strong secret for agent-to-agent calls
+- `LLM_API_KEY` and optional `LLM_MODEL` / `LLM_BASE_URL`
 - optional scan frequency and Gmail query

 ## 4) Run
@ -79,8 +82,9 @@ If `available` is `true`, there are no busy slots in that range.

 ## Classification behavior

- LinkedIn detection: sender or subject contains `linkedin` (LinkedIn has priority).
- Advertising detection: Gmail promotion category, `List-Unsubscribe`, `Precedence: bulk/list/junk`, common promo keywords, and marketing sender hints.
+- LLM classification is used for each email (`LINKEDIN`, `ADVERTISING`, `OTHER`).
+- LinkedIn has priority over advertising inside the classifier prompt.
+- Set `LLM_FALLBACK_TO_RULES=true` only if you want rules-based backup when LLM calls fail.
 - Every scanned message gets an `AgentProcessed` label to avoid reprocessing loops.

 ## Notes
--- a/app/config.py
+++ b/app/config.py
@ -18,10 +18,16 @@ class Settings:
    gmail_scan_interval_minutes: int
    gmail_query: str
    agent_api_key: str
+    llm_api_key: str
+    llm_model: str
+    llm_base_url: str | None
+    llm_timeout_seconds: float
+    llm_fallback_to_rules: bool
    log_level: str


 def get_settings() -> Settings:
+    llm_base_url = os.getenv("LLM_BASE_URL", "").strip()
    return Settings(
        google_client_secrets_file=os.getenv("GOOGLE_CLIENT_SECRETS_FILE", "credentials.json"),
        google_token_file=os.getenv("GOOGLE_TOKEN_FILE", "token.json"),
@ -30,5 +36,14 @@ def get_settings() -> Settings:
            "GMAIL_QUERY", "in:inbox -label:AgentProcessed newer_than:7d"
        ),
        agent_api_key=os.getenv("AGENT_API_KEY", ""),
+        llm_api_key=os.getenv("LLM_API_KEY", ""),
+        llm_model=os.getenv("LLM_MODEL", "gpt-4.1-mini"),
+        llm_base_url=llm_base_url or None,
+        llm_timeout_seconds=float(os.getenv("LLM_TIMEOUT_SECONDS", "20")),
+        llm_fallback_to_rules=_as_bool(os.getenv("LLM_FALLBACK_TO_RULES", "false")),
        log_level=os.getenv("LOG_LEVEL", "INFO"),
    )
+
+
+def _as_bool(value: str) -> bool:
+    return value.strip().lower() in {"1", "true", "yes", "on"}
--- a/app/gmail_agent.py
+++ b/app/gmail_agent.py
@ -5,6 +5,7 @@ from email.utils import parseaddr
 import logging
 from typing import Any

+from app.llm_classifier import LLMEmailClassifier

 METADATA_HEADERS = [
    "From",
@ -49,9 +50,18 @@ class ScanResult:


 class GmailTriageAgent:
-    def __init__(self, gmail_service: Any, query: str) -> None:
+    def __init__(
+        self,
+        gmail_service: Any,
+        query: str,
+        *,
+        classifier: LLMEmailClassifier | None = None,
+        fallback_to_rules: bool = True,
+    ) -> None:
        self.gmail_service = gmail_service
        self.query = query
+        self.classifier = classifier
+        self.fallback_to_rules = fallback_to_rules

    def ensure_labels(self) -> dict[str, str]:
        labels_response = (
@ -134,24 +144,28 @@ class GmailTriageAgent:

            sender = headers.get("from", "")
            subject = headers.get("subject", "")
+            snippet = message.get("snippet", "")
+            list_unsubscribe = headers.get("list-unsubscribe", "")
+            precedence = headers.get("precedence", "")

-            should_linkedin = self._is_linkedin_email(sender=sender, subject=subject)
-            should_advertising = self._is_advertising_email(
+            label = self._classify_email(
+                message_id=message_id,
                sender=sender,
                subject=subject,
-                list_unsubscribe=headers.get("list-unsubscribe", ""),
-                precedence=headers.get("precedence", ""),
+                snippet=snippet,
+                list_unsubscribe=list_unsubscribe,
+                precedence=precedence,
                message_label_ids=label_ids,
            )

            add_labels = [label_by_name["AgentProcessed"]]
            remove_labels = []

-            if should_linkedin:
+            if label == "LINKEDIN":
                add_labels.insert(0, label_by_name["LinkedIn"])
                remove_labels.append("INBOX")
                outcome = "linkedin"
-            elif should_advertising:
+            elif label == "ADVERTISING":
                add_labels.insert(0, label_by_name["Advertising"])
                remove_labels.append("INBOX")
                outcome = "advertising"
@ -177,6 +191,53 @@ class GmailTriageAgent:
            logger.exception("Failed to route message %s", message_id)
            return "failed"

+    def _classify_email(
+        self,
+        *,
+        message_id: str,
+        sender: str,
+        subject: str,
+        snippet: str,
+        list_unsubscribe: str,
+        precedence: str,
+        message_label_ids: set[str],
+    ) -> str:
+        if self.classifier:
+            try:
+                llm_result = self.classifier.classify(
+                    sender=sender,
+                    subject=subject,
+                    snippet=snippet,
+                    list_unsubscribe=list_unsubscribe,
+                    precedence=precedence,
+                    message_label_ids=message_label_ids,
+                )
+                logger.info(
+                    "Message %s classified by LLM as %s (confidence=%.2f)",
+                    message_id,
+                    llm_result.label,
+                    llm_result.confidence,
+                )
+                return llm_result.label
+            except Exception:
+                logger.exception("LLM classification failed for %s", message_id)
+                if not self.fallback_to_rules:
+                    return "OTHER"
+
+        if self.fallback_to_rules:
+            if self._is_linkedin_email(sender=sender, subject=subject):
+                return "LINKEDIN"
+            if self._is_advertising_email(
+                sender=sender,
+                subject=subject,
+                list_unsubscribe=list_unsubscribe,
+                precedence=precedence,
+                message_label_ids=message_label_ids,
+            ):
+                return "ADVERTISING"
+
+        return "OTHER"
+
    def _is_linkedin_email(self, sender: str, subject: str) -> bool:
        sender_lower = sender.lower()
        subject_lower = subject.lower()
--- a/app/llm_classifier.py
+++ b/app/llm_classifier.py
@ -0,0 +1,120 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+import json
+import logging
+import re
+
+from openai import OpenAI
+
+logger = logging.getLogger("personal-agent.llm")
+
+ALLOWED_LABELS = {"LINKEDIN", "ADVERTISING", "OTHER"}
+
+SYSTEM_PROMPT = """You classify incoming emails into exactly one label:
+- LINKEDIN: official LinkedIn platform emails (job alerts, invites, network updates, LinkedIn newsletters).
+- ADVERTISING: marketing/promotional/sales emails, newsletters, coupons, deals, brand campaigns.
+- OTHER: anything else.
+
+Rules:
+1) If sender/content clearly belongs to LinkedIn, choose LINKEDIN even if promotional.
+2) If uncertain between ADVERTISING and OTHER, choose OTHER.
+3) Return only JSON with this schema:
+{"label":"LINKEDIN|ADVERTISING|OTHER","confidence":0.0-1.0,"reason":"short reason"}"""
+
+
+@dataclass(frozen=True)
+class LLMClassification:
+    label: str
+    confidence: float
+    reason: str
+
+
+class LLMEmailClassifier:
+    def __init__(
+        self,
+        *,
+        api_key: str,
+        model: str,
+        base_url: str | None = None,
+        timeout_seconds: float = 20.0,
+    ) -> None:
+        if not api_key:
+            raise ValueError("LLM API key is required for LLM classification.")
+
+        self.model = model
+        self.client = OpenAI(
+            api_key=api_key,
+            base_url=base_url,
+            timeout=timeout_seconds,
+        )
+
+    def classify(
+        self,
+        *,
+        sender: str,
+        subject: str,
+        snippet: str,
+        list_unsubscribe: str,
+        precedence: str,
+        message_label_ids: set[str],
+    ) -> LLMClassification:
+        email_payload = {
+            "sender": sender,
+            "subject": subject,
+            "snippet": snippet,
+            "list_unsubscribe_present": bool(list_unsubscribe.strip()),
+            "precedence": precedence,
+            "gmail_label_ids": sorted(message_label_ids),
+        }
+
+        completion = self.client.chat.completions.create(
+            model=self.model,
+            temperature=0,
+            response_format={"type": "json_object"},
+            max_tokens=120,
+            messages=[
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": json.dumps(email_payload, ensure_ascii=True)},
+            ],
+        )
+        content = completion.choices[0].message.content or "{}"
+
+        parsed = _parse_json(content)
+        label = str(parsed.get("label", "OTHER")).upper().strip()
+        if label not in ALLOWED_LABELS:
+            logger.warning("Unexpected LLM label '%s', falling back to OTHER.", label)
+            label = "OTHER"
+
+        confidence = _to_confidence(parsed.get("confidence", 0.0))
+        reason = str(parsed.get("reason", "")).strip()
+        return LLMClassification(label=label, confidence=confidence, reason=reason)
+
+
+def _parse_json(content: str) -> dict:
+    if not content:
+        return {}
+
+    try:
+        return json.loads(content)
+    except json.JSONDecodeError:
+        match = re.search(r"\{.*\}", content, re.DOTALL)
+        if not match:
+            return {}
+        try:
+            return json.loads(match.group(0))
+        except json.JSONDecodeError:
+            return {}
+
+
+def _to_confidence(raw_value: object) -> float:
+    try:
+        confidence = float(raw_value)
+    except (TypeError, ValueError):
+        return 0.0
+
+    if confidence < 0:
+        return 0.0
+    if confidence > 1:
+        return 1.0
+    return confidence
--- a/app/main.py
+++ b/app/main.py
@ -13,6 +13,7 @@ from app.calendar_agent import CalendarAvailabilityAgent
 from app.config import get_settings
 from app.gmail_agent import GmailTriageAgent
 from app.google_clients import build_calendar_service, build_gmail_service
+from app.llm_classifier import LLMEmailClassifier

 settings = get_settings()
 logging.basicConfig(level=getattr(logging, settings.log_level.upper(), logging.INFO))
@ -21,6 +22,7 @@ logger = logging.getLogger("personal-agent")
 app = FastAPI(title="Personal Agent", version="0.1.0")
 scheduler: AsyncIOScheduler | None = None
 scan_lock: asyncio.Lock | None = None
+llm_key_warning_logged = False


 class ScanResponse(BaseModel):
@ -73,7 +75,12 @@ def verify_api_key(

 def _run_scan_once(max_results: int) -> ScanResponse:
    gmail_service = build_gmail_service(settings)
-    gmail_agent = GmailTriageAgent(gmail_service=gmail_service, query=settings.gmail_query)
+    gmail_agent = GmailTriageAgent(
+        gmail_service=gmail_service,
+        query=settings.gmail_query,
+        classifier=_build_llm_classifier(),
+        fallback_to_rules=settings.llm_fallback_to_rules,
+    )
    result = gmail_agent.scan_and_route_messages(max_results=max_results)
    return ScanResponse(
        scanned=result.scanned,
@ -84,6 +91,35 @@ def _run_scan_once(max_results: int) -> ScanResponse:
    )


+def _build_llm_classifier() -> LLMEmailClassifier | None:
+    global llm_key_warning_logged
+
+    if not settings.llm_api_key:
+        if settings.llm_fallback_to_rules:
+            if not llm_key_warning_logged:
+                logger.warning(
+                    "LLM_API_KEY not set. Falling back to rules-based classification."
+                )
+                llm_key_warning_logged = True
+            return None
+        raise RuntimeError(
+            "LLM_API_KEY is required when LLM_FALLBACK_TO_RULES is disabled."
+        )
+
+    try:
+        return LLMEmailClassifier(
+            api_key=settings.llm_api_key,
+            model=settings.llm_model,
+            base_url=settings.llm_base_url,
+            timeout_seconds=settings.llm_timeout_seconds,
+        )
+    except Exception:
+        if settings.llm_fallback_to_rules:
+            logger.exception("Could not initialize LLM classifier; using rules fallback.")
+            return None
+        raise
+
+
 def _get_scan_lock() -> asyncio.Lock:
    global scan_lock
    if scan_lock is None:
--- a/requirements.txt
+++ b/requirements.txt
@ -3,5 +3,6 @@ fastapi
 google-api-python-client
 google-auth
 google-auth-oauthlib
+openai
 python-dotenv
 uvicorn[standard]