Browse Source

Initial commit, concurrency is set and ready

master
Martins Eglitis 5 months ago
commit
3dd5c1e6d4
3 changed files with 74 additions and 0 deletions
  1. +3
    -0
      .gitignore
  2. +3
    -0
      README.md
  3. +68
    -0
      crawler.go

+ 3
- 0
.gitignore View File

@@ -0,0 +1,3 @@
.idea/

*.html

+ 3
- 0
README.md View File

@@ -0,0 +1,3 @@
# crawler

A simple concurrent web crawler written in Go.

+ 68
- 0
crawler.go View File

@@ -0,0 +1,68 @@
package main

import (
"flag"
"fmt"
"io/ioutil"
"net/http"
"sync"
)

func request(channel chan int, worker int, wg *sync.WaitGroup, url *string) {
//The for loop is important as the worker will continue to work.
for task := range channel {
res, err := http.DefaultClient.Get(fmt.Sprintf("%s/%v", *url, task))

if err != nil {
fmt.Printf("error occured : %v \n", err)
}

defer res.Body.Close()

body, err := ioutil.ReadAll(res.Body)

if err != nil {
fmt.Printf("unable to parse %v : %v \n", url, res)
}

fmt.Printf("number of bytes in body : %v \n", len(body))
}

//Decrement the semaphore value.
wg.Done()
}

func main() {
url := flag.String("url", "http://localhost/tests", "The URL that should be crawled")
minID := flag.Int("min-id", 0, "The minimum ID value")
maxID := flag.Int("max-id", 20, "The maximum ID value")
concurrency := flag.Int("concurrency", 5, "The number of concurrent tasks")
flag.Parse()

//Create a bi-directional channel.
channel := make(chan int, *maxID - *minID)

//Create a waiting group.
var wg sync.WaitGroup

//Set the semaphore value to the concurrency level.
wg.Add(*concurrency)

//Create new workers. This must be done before writing tasks to the channel.
//Otherwise, the channel will block the current main goroutine and deadlock.
for worker := 0; worker < *concurrency; worker++ {
go request(channel, worker, &wg, url)
}

//Create new tasks and write them to the channel.
for task := *minID; task < *maxID; task++ {
channel <- task
}

//Close the channel. Close should be called after each task is assigned to the channel.
//Otherwise it will throw an error for writing to closed channel.
close(channel)

//Wait until the semaphore will become zero. This should be called after the channel is closed.
wg.Wait()
}

Loading…
Cancel
Save