From 66ad4e166656bbe710edd2c54fb333020e25b378 Mon Sep 17 00:00:00 2001 From: dvdrw Date: Wed, 4 Oct 2023 15:56:32 +0200 Subject: [PATCH] feat: make scraping much more robust Sometimes the upstream API will just drop most of the data included. This commit assumes less of that data will be included, as well as using the most out of the data. For example, using line IDs from upstream is entirely avoided, using line names and titles to determine lines and their directions. --- scraper/scrape.go | 78 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 55 insertions(+), 23 deletions(-) diff --git a/scraper/scrape.go b/scraper/scrape.go index c5dccc1..85dc9a0 100644 --- a/scraper/scrape.go +++ b/scraper/scrape.go @@ -31,11 +31,11 @@ type StationInfo struct { } type LineInfo struct { - Id int `json:"id"` - Name string `json:"name"` - Title string `json:"title"` - Stations []StationInfo `json:"stations"` - Route []Coords `json:"route"` + Name string `json:"name"` + Title string `json:"title"` + Direction int `json:"direction"` + Stations []StationInfo `json:"stations"` + Route []Coords `json:"route"` } type ScrapeResult struct { @@ -142,25 +142,40 @@ func Scrape(stations []Station, c ApiConfig) ([]ScrapeResult, error) { var lineInfo LineInfo - lineId, err := strconv.Atoi(bus["id"].(string)) - if err != nil { - Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\t%v", id, err) - continue - } - lineInfo.Id = lineId - lineName, ok := bus["line_number"].(string) if !ok { lineName = "" } - lineInfo.Name = lineName + lineInfo.Name = strings.TrimSpace(lineName) lineTitle, ok := bus["line_title"].(string) if ok { lineInfo.Title = lineTitle } + mainLineTitle, ok := bus["main_line_title"].(string) + if ok { + if mainLineTitle == lineTitle { + lineInfo.Direction = 1 + } else { + lineInfo.Direction = 0 + } + } else { + lineInfo.Direction = -1 + } + var vehicles []Vehicle + + garageNoStr, ok := bus["garage_no"].(string) + if ok { + vehicles = append(vehicles, Vehicle{ + GarageId: garageNoStr, + Coords: Coords{}, + AtStationId: -1, + AtStationName: "", + }) + } + vehiclesJson, ok := bus["vehicles"].([]interface{}) if !ok { @@ -175,34 +190,38 @@ func Scrape(stations []Station, c ApiConfig) ([]ScrapeResult, error) { continue } - stationNumber, err := strconv.Atoi(vehicle["station_number"].(string)) - if err != nil { - Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\t%v", id, err) - continue + stationNumberString, ok := vehicle["station_number"].(string) + var stationNumber int = -1 + if ok { + stationNumber, err = strconv.Atoi(stationNumberString) + if err != nil { + Log.Printf("WARN: No station number for vehicle stationId=%v\n\t%v", id, err) + continue + } } lat, err := strconv.ParseFloat(vehicle["lat"].(string), 64) if err != nil { - Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\t%v", id, err) + Log.Printf("WARN: Failed to parse vehicle lat for stationId=%v\n\t%v", id, err) continue } lon, err := strconv.ParseFloat(vehicle["lng"].(string), 64) if err != nil { - Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\t%v", id, err) + Log.Printf("WARN: Failed to parse vehicle lng for stationId=%v\n\t%v", id, err) continue } garageId, ok := vehicle["garageNo"].(string) if !ok { - Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\tVehicle's garageNo invalid or missing: %v", id, vehicle) + Log.Printf("WARN: Failed to parse vehicle garageNo for stationId=%v\n\tVehicle's garageNo invalid or missing: %v", id, vehicle) continue } atStationName, ok := vehicle["station_name"].(string) if !ok { - Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\tVehicle's station_name invalid or missing: %v", id, vehicle) - continue + Log.Printf("WARN: Failed to parse station_name for vehicle at stationId=%v\n\tVehicle's station_name invalid or missing: %v", id, vehicle) + atStationName = "" } vehicles = append(vehicles, Vehicle{ @@ -213,7 +232,12 @@ func Scrape(stations []Station, c ApiConfig) ([]ScrapeResult, error) { }) } - lineInfo.Route = scrapeLineRoute(bus["line_route"].([]interface{})) + lineRouteJson, ok := bus["line_route"].([]interface{}) + if ok { + lineInfo.Route = scrapeLineRoute(lineRouteJson) + } else { + lineInfo.Route = []Coords{} + } all_stations, ok := bus["all_stations"].([]interface{}) if !ok { @@ -275,6 +299,14 @@ func Scrape(stations []Station, c ApiConfig) ([]ScrapeResult, error) { } SKIP_AUX_INFO: + // Sometimes we don't get the vehicles array in the upstream response. + // In that case, we grab the only info about the bus we can get + // (its garageNo from the toplevel garage_no field). + // In case we parse *both* buses, pick the one last added. + if len(vehicles) > 1 { + vehicles = []Vehicle{vehicles[len(vehicles)-1]} + } + results = append(results, ScrapeResult{ Success: true, Id: id,