From 1d46b526eed472d2ebdd77ec83d540d37c773f75 Mon Sep 17 00:00:00 2001 From: oabrivard Date: Sun, 20 Aug 2023 12:50:35 +0200 Subject: [PATCH] Added web crawler exercise --- exercise-web-crawler/exercise-web-crawler.go | 98 ++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 exercise-web-crawler/exercise-web-crawler.go diff --git a/exercise-web-crawler/exercise-web-crawler.go b/exercise-web-crawler/exercise-web-crawler.go new file mode 100644 index 0000000..919b892 --- /dev/null +++ b/exercise-web-crawler/exercise-web-crawler.go @@ -0,0 +1,98 @@ +package main + +import ( + "fmt" + "sync" +) + +var mu sync.Mutex +var visited = map[string]bool{} +var wgCrawl sync.WaitGroup + +type Fetcher interface { + // Fetch returns the body of URL and + // a slice of URLs found on that page. + Fetch(url string) (body string, urls []string, err error) +} + +// Crawl uses fetcher to recursively crawl +// pages starting with url, to a maximum of depth. +func Crawl(url string, depth int, fetcher Fetcher) { + defer wgCrawl.Done() + + if depth <= 0 { + return + } + body, urls, err := fetcher.Fetch(url) + if err != nil { + fmt.Println(err) + return + } + fmt.Printf("found: %s %q\n", url, body) + for _, u := range urls { + mu.Lock() + skip := visited[u] + visited[u] = true + mu.Unlock() + if !skip { + wgCrawl.Add(1) + go Crawl(u, depth-1, fetcher) + } + } + return +} + +func main() { + wgCrawl.Add(1) + Crawl("https://golang.org/", 4, fetcher) + wgCrawl.Wait() +} + +// fakeFetcher is Fetcher that returns canned results. +type fakeFetcher map[string]*fakeResult + +type fakeResult struct { + body string + urls []string +} + +func (f fakeFetcher) Fetch(url string) (string, []string, error) { + if res, ok := f[url]; ok { + return res.body, res.urls, nil + } + return "", nil, fmt.Errorf("not found: %s", url) +} + +// fetcher is a populated fakeFetcher. +var fetcher = fakeFetcher{ + "https://golang.org/": &fakeResult{ + "The Go Programming Language", + []string{ + "https://golang.org/pkg/", + "https://golang.org/cmd/", + }, + }, + "https://golang.org/pkg/": &fakeResult{ + "Packages", + []string{ + "https://golang.org/", + "https://golang.org/cmd/", + "https://golang.org/pkg/fmt/", + "https://golang.org/pkg/os/", + }, + }, + "https://golang.org/pkg/fmt/": &fakeResult{ + "Package fmt", + []string{ + "https://golang.org/", + "https://golang.org/pkg/", + }, + }, + "https://golang.org/pkg/os/": &fakeResult{ + "Package os", + []string{ + "https://golang.org/", + "https://golang.org/pkg/", + }, + }, +}