Files
scraper/scraper/scrape.go

302 lines
6.9 KiB
Go

package scraper
import (
"encoding/json"
"fmt"
"log"
"net/http"
"strconv"
"strings"
"time"
)
type Coords struct {
Lat float64 `json:"lat"`
Lon float64 `json:"lon"`
}
type Vehicle struct {
GarageId string `json:"garageId"`
Coords Coords `json:"coords"`
AtStationId int `json:"atStationId"`
AtStationName string `json:"atStationName"`
}
type StationInfo struct {
Id int `json:"id"`
CityId int `json:"cityId"`
CityName string `json:"cityName"`
Name string `json:"name"`
Coords Coords `json:"coords"`
}
type LineInfo struct {
Id int `json:"id"`
Name string `json:"name"`
Title string `json:"title"`
Stations []StationInfo `json:"stations"`
Route []Coords `json:"route"`
}
type ScrapeResult struct {
Id int `json:"id"`
Success bool `json:"success"`
SecondsLeft int `json:"secondsLeft"`
LineInfo LineInfo `json:"lineInfo"`
Vehicles []Vehicle `json:"vehicles"`
}
type Station struct {
Id int `json:"id"`
}
var Log = log.Default()
const apiParams = "?ibfm=TM000001&action=get_all_trips&station_uids="
func scrapeRange(s []Station) string {
return strings.Join(foreach(s, func(s Station) string { return fmt.Sprintf("%v", s.Id) }), ";")
}
type ApiConfig struct {
Endpoint, Key string
}
func grabData(stations []Station, c ApiConfig) (map[string][]map[string]interface{}, error) {
req, err := http.NewRequest(http.MethodGet, c.Endpoint+apiParams+scrapeRange(stations), nil)
if err != nil {
return nil, err
}
req.Header.Set("X-Api-Authentication", c.Key)
before := time.Now()
resp, err := http.DefaultClient.Do(req)
elapsed := time.Since(before)
if elapsed > time.Second*5 {
Log.Printf("WARN: Got response from upstream in %v seconds", elapsed.Seconds())
}
if err != nil {
return nil, err
} else if resp.StatusCode != 200 {
return nil, fmt.Errorf("Got non-200 code in response: %v", resp)
}
var d map[string][]map[string]interface{}
body := json.NewDecoder(resp.Body)
err = body.Decode(&d)
if err != nil {
return nil, err
}
return d, nil
}
func scrapeLineRoute(in []interface{}) []Coords {
return foreach(in, func(x interface{}) Coords {
s, _ := x.(string)
var cs Coords
fmt.Sscanf(s, "%f,%f", &cs.Lon, &cs.Lat)
return cs
})
}
func Scrape(stations []Station, c ApiConfig) ([]ScrapeResult, error) {
d, err := grabData(stations, c)
if err != nil {
return nil, err
}
var results []ScrapeResult
for idString, stationResult := range d {
id, err := strconv.Atoi(idString)
if err != nil {
Log.Printf("WARN: Failed to parse response for stationId=%v\n\t%v", idString, err)
continue
}
for _, bus := range stationResult {
_, ok := bus["success"]
// Field `success' only exists if it is false
if ok {
Log.Printf("WARN: Got unsuccessful response for stationId=%d. Perhaps it no longer exists?\n", id)
continue
}
secondsLeft, ok := bus["seconds_left"].(float64)
if !ok {
// No bus is coming to this station
results = append(results, ScrapeResult{
Success: true,
Id: id,
SecondsLeft: -1,
})
continue
}
var lineInfo LineInfo
lineId, err := strconv.Atoi(bus["id"].(string))
if err != nil {
Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\t%v", id, err)
continue
}
lineInfo.Id = lineId
lineName, ok := bus["line_number"].(string)
if !ok {
lineName = ""
}
lineInfo.Name = lineName
lineTitle, ok := bus["line_title"].(string)
if ok {
lineInfo.Title = lineTitle
}
var vehicles []Vehicle
vehiclesJson, ok := bus["vehicles"].([]interface{})
if !ok {
goto SKIP_AUX_INFO
}
{
for _, vehicleJson := range vehiclesJson {
vehicle, ok := vehicleJson.(map[string]interface{})
if !ok {
Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\t%v", id, vehicle)
continue
}
stationNumber, err := strconv.Atoi(vehicle["station_number"].(string))
if err != nil {
Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\t%v", id, err)
continue
}
lat, err := strconv.ParseFloat(vehicle["lat"].(string), 64)
if err != nil {
Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\t%v", id, err)
continue
}
lon, err := strconv.ParseFloat(vehicle["lng"].(string), 64)
if err != nil {
Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\t%v", id, err)
continue
}
garageId, ok := vehicle["garageNo"].(string)
if !ok {
Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\tVehicle's garageNo invalid or missing: %v", id, vehicle)
continue
}
atStationName, ok := vehicle["station_name"].(string)
if !ok {
Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\tVehicle's station_name invalid or missing: %v", id, vehicle)
continue
}
vehicles = append(vehicles, Vehicle{
GarageId: garageId,
AtStationName: atStationName,
AtStationId: stationNumber,
Coords: Coords{lat, lon},
})
}
lineInfo.Route = scrapeLineRoute(bus["line_route"].([]interface{}))
all_stations, ok := bus["all_stations"].([]interface{})
if !ok {
goto SKIP_AUX_INFO
}
{
for _, station := range all_stations {
station, ok := station.(map[string]interface{})
if !ok {
continue
}
stationId, ok := station["id"].(float64)
if !ok {
continue
}
cityId, ok := station["city_id"].(float64)
if !ok {
continue
}
cityName, ok := station["city_name"].(string)
if !ok {
continue
}
stationName, ok := station["name"].(string)
if err != nil {
continue
}
stationInfo := StationInfo{
Id: int(stationId),
CityId: int(cityId),
CityName: cityName,
Name: stationName,
}
coordinates, ok := station["coordinates"].(map[string]interface{})
if ok {
lat, err := strconv.ParseFloat(coordinates["latitude"].(string), 64)
if err != nil {
break
}
lon, err := strconv.ParseFloat(coordinates["latitude"].(string), 64)
if err != nil {
break
}
stationInfo.Coords = Coords{lat, lon}
}
lineInfo.Stations = append(lineInfo.Stations, stationInfo)
}
}
}
SKIP_AUX_INFO:
results = append(results, ScrapeResult{
Success: true,
Id: id,
LineInfo: lineInfo,
SecondsLeft: int(secondsLeft),
Vehicles: vehicles,
})
}
}
Log.Printf("Successfully scraped range %s", scrapeRange(stations))
return results, nil
}
func ScheduleScrape(chunk []Station, c chan []ScrapeResult, a ApiConfig) {
r, err := Scrape(chunk, a)
if err != nil {
Log.Printf("ERROR: Scraping failed for stations %#v\n\t%v", chunk, err)
}
c <- r
time.Sleep(time.Minute * 3)
ScheduleScrape(chunk, c, a)
}