feat: fan out scrapes over time, support different scrape intervals
This should help avoid rate-limits on upstream, as well as let admins determine update latency manually.
This commit is contained in:
parent
ef19ed40f2
commit
738d43adea
28
main/main.go
28
main/main.go
|
@ -9,6 +9,7 @@ import (
|
|||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.dvdrw.dev/nsmarter/scraper/scraper"
|
||||
)
|
||||
|
@ -24,6 +25,7 @@ var chunkSize = 5
|
|||
var apiEndpoint string = "https://online.nsmart.rs/publicapi/v1/announcement/announcement.php"
|
||||
var apiKey string
|
||||
var limitQuerySize = 0
|
||||
var timeoutPeriod = 180
|
||||
var fillerUrl string = "localhost"
|
||||
|
||||
func parseEnvars() {
|
||||
|
@ -41,6 +43,13 @@ func parseEnvars() {
|
|||
Log.Printf("WARN: Invalid value for CHUNK_SIZE. Falling back to default value (%v)\n", defaultChunkSize)
|
||||
chunkSize = defaultChunkSize
|
||||
}
|
||||
case "TIMEOUT_PERIOD":
|
||||
var err error
|
||||
timeoutPeriod, err = strconv.Atoi(pair[1])
|
||||
if err != nil {
|
||||
Log.Printf("WARN: Invalid value for TIMEOUT_PERIOD. Falling back to default value (%v)\n", 180)
|
||||
timeoutPeriod = 180
|
||||
}
|
||||
case "LIMIT_QUERY_SIZE":
|
||||
var err error
|
||||
limitQuerySize, err = strconv.Atoi(pair[1])
|
||||
|
@ -86,16 +95,19 @@ func main() {
|
|||
}
|
||||
|
||||
results := make(chan []scraper.ScrapeResult, 200)
|
||||
for _, chunk := range stationChunks {
|
||||
go scraper.ScheduleScrape(chunk,
|
||||
results,
|
||||
scraper.ApiConfig{Endpoint: apiEndpoint,
|
||||
Key: apiKey})
|
||||
}
|
||||
go func() {
|
||||
for _, chunk := range stationChunks {
|
||||
go scraper.ScheduleScrape(chunk,
|
||||
results,
|
||||
scraper.ApiConfig{Endpoint: apiEndpoint,
|
||||
Key: apiKey,
|
||||
Timeout: int64(timeoutPeriod)})
|
||||
// Fan out scrapes over time so as to avoid upstream rate limits
|
||||
time.Sleep(time.Millisecond * 100)
|
||||
}
|
||||
}()
|
||||
|
||||
for r := range results {
|
||||
fmt.Printf("Received data: %#v\n", r)
|
||||
|
||||
json, err := json.Marshal(r)
|
||||
if err != nil {
|
||||
fmt.Print("Couldn't serialise struct to JSON: ", err)
|
||||
|
|
|
@ -60,6 +60,7 @@ func scrapeRange(s []Station) string {
|
|||
|
||||
type ApiConfig struct {
|
||||
Endpoint, Key string
|
||||
Timeout int64
|
||||
}
|
||||
|
||||
func grabData(stations []Station, c ApiConfig) (map[string][]map[string]interface{}, error) {
|
||||
|
@ -296,6 +297,7 @@ func ScheduleScrape(chunk []Station, c chan []ScrapeResult, a ApiConfig) {
|
|||
}
|
||||
|
||||
c <- r
|
||||
time.Sleep(time.Minute * 3)
|
||||
duration := time.Duration(a.Timeout) * time.Second
|
||||
time.Sleep(duration)
|
||||
ScheduleScrape(chunk, c, a)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue