diff --git a/backend/src/services/llm/schema.rs b/backend/src/services/llm/schema.rs
index f137b92..17a244d 100644
--- a/backend/src/services/llm/schema.rs
+++ b/backend/src/services/llm/schema.rs
@@ -107,6 +107,36 @@ pub fn build_classification_schema() -> Value {
     })
 }
 
+/// Build a JSON Schema for LLM link extraction response.
+pub fn build_link_extraction_schema() -> Value {
+    serde_json::json!({
+        "type": "object",
+        "properties": {
+            "urls": {
+                "type": "array",
+                "items": { "type": "string" }
+            }
+        },
+        "required": ["urls"],
+        "additionalProperties": false
+    })
+}
+
+/// Build a JSON Schema for LLM article content extraction response.
+pub fn build_article_extraction_schema() -> Value {
+    serde_json::json!({
+        "type": "object",
+        "properties": {
+            "title": { "type": "string", "description": "Article title" },
+            "published_date": { "type": "string", "description": "ISO 8601 date or empty string if not found" },
+            "body_text": { "type": "string", "description": "Main article content" },
+            "is_error_page": { "type": "boolean", "description": "True if this is an error/404 page" }
+        },
+        "required": ["title", "published_date", "body_text", "is_error_page"],
+        "additionalProperties": false
+    })
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -299,4 +329,24 @@ mod tests {
         assert_eq!(assignments["items"]["additionalProperties"], false);
         assert_eq!(schema["additionalProperties"], false);
     }
+
+    #[test]
+    fn link_extraction_schema_has_urls_array() {
+        let schema = build_link_extraction_schema();
+        assert_eq!(schema["properties"]["urls"]["type"], "array");
+        assert_eq!(schema["additionalProperties"], false);
+    }
+
+    #[test]
+    fn article_extraction_schema_strict_mode_compatible() {
+        let schema = build_article_extraction_schema();
+        let props = schema["properties"].as_object().unwrap();
+        assert!(props.contains_key("title"));
+        assert!(props.contains_key("published_date"));
+        assert!(props.contains_key("body_text"));
+        assert!(props.contains_key("is_error_page"));
+        assert_eq!(schema["additionalProperties"], false);
+        // published_date is string (not union type) for OpenAI strict mode
+        assert_eq!(props["published_date"]["type"], "string");
+    }
 }
diff --git a/backend/src/services/prompts.rs b/backend/src/services/prompts.rs
index 6d13d47..5659664 100644
--- a/backend/src/services/prompts.rs
+++ b/backend/src/services/prompts.rs
@@ -156,6 +156,55 @@ pub fn build_rewrite_prompt(
     (system_prompt, user_prompt)
 }
 
+/// Build a prompt for LLM-assisted link extraction from a source page.
+pub fn build_link_extraction_prompt(head_html: &str, body_html: &str) -> (String, String) {
+    let system_prompt =
+        "Tu es un assistant qui analyse des pages web. \
+         Tu dois identifier les liens vers des articles d'actualite. \
+         Reponds uniquement au format JSON demande."
+            .to_string();
+
+    let body_truncated: String = body_html.chars().take(8000).collect();
+
+    let user_prompt = format!(
+        "Voici le contenu HTML d'une page de blog ou de site d'actualites.\n\n\
+         <head>\n{head}\n</head>\n\n\
+         <body (extrait)>\n{body}\n</body>\n\n\
+         Extrais UNIQUEMENT les URLs qui pointent vers des articles \
+         (pas les liens de navigation, tags, categories, login, pages statiques, etc.).\n\
+         Retourne les URLs completes dans le format JSON demande.",
+        head = head_html,
+        body = body_truncated,
+    );
+
+    (system_prompt, user_prompt)
+}
+
+/// Build a prompt for LLM-assisted article content extraction.
+pub fn build_article_extraction_prompt(head_html: &str, body_text: &str) -> (String, String) {
+    let system_prompt =
+        "Tu es un assistant qui analyse des articles web. \
+         Tu dois extraire les informations structurees de l'article. \
+         Reponds uniquement au format JSON demande."
+            .to_string();
+
+    let user_prompt = format!(
+        "Voici le contenu d'une page web.\n\n\
+         <head>\n{head}\n</head>\n\n\
+         Contenu textuel de la page :\n{body}\n\n\
+         Extrais les informations suivantes :\n\
+         - title : le titre de l'article\n\
+         - published_date : la date de publication au format ISO 8601 (YYYY-MM-DDTHH:MM:SSZ), \
+         ou une chaine vide si introuvable\n\
+         - body_text : le contenu principal de l'article (pas la navigation, pas les pubs)\n\
+         - is_error_page : true si c'est une page d'erreur/404, false sinon",
+        head = head_html,
+        body = body_text,
+    );
+
+    (system_prompt, user_prompt)
+}
+
 /// Build a prompt for classifying scraped articles into categories.
 ///
 /// # Arguments
@@ -462,4 +511,27 @@ mod tests {
         let (_, user_prompt) = build_search_prompt(&settings, &sources, date, &[], None);
         assert!(user_prompt.contains("exactement"));
     }
+
+    #[test]
+    fn link_extraction_prompt_includes_html() {
+        let (sys, user) = build_link_extraction_prompt("<title>Blog</title>", "<a href='/post'>P</a>");
+        assert!(user.contains("<title>Blog</title>"));
+        assert!(user.contains("articles"));
+        assert!(sys.contains("liens"));
+    }
+
+    #[test]
+    fn link_extraction_prompt_truncates_body() {
+        let long_body = "x".repeat(20000);
+        let (_, user) = build_link_extraction_prompt("", &long_body);
+        assert!(user.len() < 15000);
+    }
+
+    #[test]
+    fn article_extraction_prompt_includes_content() {
+        let (_, user) = build_article_extraction_prompt("<meta name='date'>", "Article body here");
+        assert!(user.contains("Article body here"));
+        assert!(user.contains("published_date"));
+        assert!(user.contains("is_error_page"));
+    }
 }