diff --git a/README.md b/README.md index 57ceb33..2d8d101 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,10 @@ This project runs a small local API service that: - scans unread emails in the root Gmail inbox -- classifies emails with **Strands** (`LINKEDIN`, `ADVERTISING`, `OTHER`) +- classifies emails with **Strands** (`LINKEDIN`, `ADVERTISING`, `VEILLE_TECHNO`, `OTHER`) - moves LinkedIn emails to a `LinkedIn` label/folder - moves advertising emails to an `Advertising` label/folder +- moves veille techno emails to a `VeilleTechno` label/folder - scans the `Advertising` label and emails you new unsubscribe links (deduplicated) - discovers unsubscribe-ready mailing lists for human review, then auto-unsubscribes selected lists - exposes a secure availability endpoint powered by Google Calendar free/busy @@ -124,7 +125,7 @@ curl -X POST "http://127.0.0.1:8000/unsubscribe/auto-run?max_results=500" \ - Scan scope is always forced to `in:inbox is:unread` (root inbox + unread). - `GMAIL_QUERY` is treated as additional filters (for example `-label:AgentProcessed`). -- Strands classification is used for each email (`LINKEDIN`, `ADVERTISING`, `OTHER`). +- Strands classification is used for each email (`LINKEDIN`, `ADVERTISING`, `VEILLE_TECHNO`, `OTHER`). - LinkedIn has priority over advertising inside the classifier prompt. - Set `LLM_FALLBACK_TO_RULES=true` only if you want rules-based backup when LLM calls fail. - Every scanned message gets an `AgentProcessed` label to avoid reprocessing loops. @@ -150,6 +151,7 @@ curl -X POST "http://127.0.0.1:8000/unsubscribe/auto-run?max_results=500" \ - Gmail "folders" are labels. This agent creates: - `LinkedIn` + - `VeilleTechno` - `Advertising` - `AgentProcessed` -- Messages classified as LinkedIn/Advertising are removed from `INBOX` (moved out of inbox). +- Messages classified as LinkedIn/Advertising/Veille_Techno are removed from `INBOX` (moved out of inbox). diff --git a/app/gmail_agent.py b/app/gmail_agent.py index 35887ed..35ad820 100644 --- a/app/gmail_agent.py +++ b/app/gmail_agent.py @@ -45,6 +45,7 @@ class ScanResult: scanned: int linkedin: int advertising: int + veille_techno: int skipped: int failed: int @@ -70,7 +71,7 @@ class GmailTriageAgent: labels = labels_response.get("labels", []) label_by_name = {label["name"]: label["id"] for label in labels} - for required_name in ("LinkedIn", "Advertising", "AgentProcessed"): + for required_name in ("LinkedIn", "Advertising", "AgentProcessed", "VeilleTechno"): if required_name not in label_by_name: created = ( self.gmail_service.users() @@ -102,6 +103,7 @@ class GmailTriageAgent: linkedin = 0 advertising = 0 + veille_techno = 0 skipped = 0 failed = 0 @@ -124,6 +126,8 @@ class GmailTriageAgent: linkedin += 1 elif outcome == "advertising": advertising += 1 + elif outcome == "veille_techno": + veille_techno += 1 elif outcome == "skipped": skipped += 1 else: @@ -133,6 +137,7 @@ class GmailTriageAgent: scanned=len(inbox_messages), linkedin=linkedin, advertising=advertising, + veille_techno=veille_techno, skipped=skipped, failed=failed, ) @@ -227,6 +232,10 @@ class GmailTriageAgent: add_labels.insert(0, label_by_name["Advertising"]) remove_labels.append("INBOX") outcome = "advertising" + elif label == "VEILLE_TECHNO": + add_labels.insert(0, label_by_name["VeilleTechno"]) + remove_labels.append("INBOX") + outcome = "veille_techno" else: outcome = "skipped" @@ -293,6 +302,8 @@ class GmailTriageAgent: message_label_ids=message_label_ids, ): return "ADVERTISING" + if self._is_veille_techno_email(sender=sender, subject=subject): + return "VEILLE_TECHNO" return "OTHER" @@ -331,3 +342,12 @@ class GmailTriageAgent: return True return any(hint in sender_lower for hint in AD_SENDER_HINTS) + + def _is_veille_techno_email(self, sender: str, subject: str) -> bool: + sender_lower = sender.lower() + subject_lower = subject.lower() + + if "cybernetica" in sender_lower or "cybernetica" in subject_lower: + return True + + return False \ No newline at end of file diff --git a/app/main.py b/app/main.py index a60de5f..20ea8a3 100644 --- a/app/main.py +++ b/app/main.py @@ -36,6 +36,7 @@ class ScanResponse(BaseModel): scanned: int linkedin: int advertising: int + veille_techno: int skipped: int failed: int @@ -154,6 +155,7 @@ def _run_scan_once(max_results: int) -> ScanResponse: scanned=result.scanned, linkedin=result.linkedin, advertising=result.advertising, + veille_techno=result.veille_techno, skipped=result.skipped, failed=result.failed, ) diff --git a/app/strands_classifier.py b/app/strands_classifier.py index 4dd5b02..66eb450 100644 --- a/app/strands_classifier.py +++ b/app/strands_classifier.py @@ -10,18 +10,19 @@ from strands.models.openai import OpenAIModel logger = logging.getLogger("personal-agent.strands") -ALLOWED_LABELS = {"LINKEDIN", "ADVERTISING", "OTHER"} +ALLOWED_LABELS = {"LINKEDIN", "ADVERTISING", "VEILLE_TECHNO", "OTHER"} SYSTEM_PROMPT = """You classify incoming emails into exactly one label: - LINKEDIN: official LinkedIn platform emails (job alerts, invites, network updates, LinkedIn newsletters). - ADVERTISING: marketing/promotional/sales emails, newsletters, coupons, deals, brand campaigns. Do not label as ADVERTISING if the email is purely transactional (e.g. order confirmation, password reset) even if it contains some marketing language. Also do not label as ADVERTISING if the sender is Cybernetica. But if the sender is Cybernetica and the content is clearly promotional (e.g. "Check out our new product"), then label as ADVERTISING. And if the sender is Castorama and the content is about Communauté d'entraide, the label should be ADVERTISING. +- VEILLE_TECHNO: Cybernetica emails that are clearly about technology watch, sharing interesting articles, insights, trends, etc. without a promotional angle. - OTHER: anything else. Rules: 1) If sender/content clearly belongs to LinkedIn, choose LINKEDIN even if promotional. 2) If uncertain between ADVERTISING and OTHER, choose OTHER. 3) Return only JSON with this schema: -{"label":"LINKEDIN|ADVERTISING|OTHER","confidence":0.0-1.0,"reason":"short reason"}""" +{"label":"LINKEDIN|ADVERTISING|VEILLE_TECHNO|OTHER","confidence":0.0-1.0,"reason":"short reason"}""" @dataclass(frozen=True)