@ -50,8 +50,6 @@ pub struct ScrapedContent {
pub is_soft_404 : bool ,
pub is_soft_404 : bool ,
/// Final URL after following redirects.
/// Final URL after following redirects.
pub url : String ,
pub url : String ,
/// Raw <head> section from the HTML, preserved for LLM extraction.
pub head_html : String ,
}
}
/// Build a `reqwest::Client` configured for scraping.
/// Build a `reqwest::Client` configured for scraping.
@ -144,7 +142,6 @@ pub async fn scrape_url(
body_text : String ::new ( ) ,
body_text : String ::new ( ) ,
is_soft_404 : false ,
is_soft_404 : false ,
url : response . url ( ) . to_string ( ) ,
url : response . url ( ) . to_string ( ) ,
head_html : String ::new ( ) ,
} ) ;
} ) ;
}
}
@ -181,13 +178,6 @@ pub async fn scrape_url(
let html_text = String ::from_utf8_lossy ( & bytes ) ;
let html_text = String ::from_utf8_lossy ( & bytes ) ;
// Extract <head> section for potential LLM use
let head_html = {
let start = html_text . find ( "<head" ) . unwrap_or ( 0 ) ;
let end = html_text . find ( "</head>" ) . map ( | i | i + 7 ) . unwrap_or ( start ) ;
html_text [ start .. end ] . to_string ( )
} ;
let document = Html ::parse_document ( & html_text ) ;
let document = Html ::parse_document ( & html_text ) ;
// Extract page title
// Extract page title
@ -214,7 +204,6 @@ pub async fn scrape_url(
body_text ,
body_text ,
is_soft_404 ,
is_soft_404 ,
url : final_url . to_string ( ) ,
url : final_url . to_string ( ) ,
head_html ,
} )
} )
}
}