feat: make scraping much more robust

Sometimes the upstream API will just drop most of the data included. This commit
assumes less of that data will be included, as well as using the most out of the
data.

For example, using line IDs from upstream is entirely avoided, using line names
and titles to determine lines and their directions.
This commit is contained in:
dvdrw 2023-10-04 15:56:32 +02:00
parent 738d43adea
commit 66ad4e1666
Signed by: dvdrw
GPG Key ID: 4756FA53D8797D7F
1 changed files with 55 additions and 23 deletions

View File

@ -31,11 +31,11 @@ type StationInfo struct {
} }
type LineInfo struct { type LineInfo struct {
Id int `json:"id"` Name string `json:"name"`
Name string `json:"name"` Title string `json:"title"`
Title string `json:"title"` Direction int `json:"direction"`
Stations []StationInfo `json:"stations"` Stations []StationInfo `json:"stations"`
Route []Coords `json:"route"` Route []Coords `json:"route"`
} }
type ScrapeResult struct { type ScrapeResult struct {
@ -142,25 +142,40 @@ func Scrape(stations []Station, c ApiConfig) ([]ScrapeResult, error) {
var lineInfo LineInfo var lineInfo LineInfo
lineId, err := strconv.Atoi(bus["id"].(string))
if err != nil {
Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\t%v", id, err)
continue
}
lineInfo.Id = lineId
lineName, ok := bus["line_number"].(string) lineName, ok := bus["line_number"].(string)
if !ok { if !ok {
lineName = "" lineName = ""
} }
lineInfo.Name = lineName lineInfo.Name = strings.TrimSpace(lineName)
lineTitle, ok := bus["line_title"].(string) lineTitle, ok := bus["line_title"].(string)
if ok { if ok {
lineInfo.Title = lineTitle lineInfo.Title = lineTitle
} }
mainLineTitle, ok := bus["main_line_title"].(string)
if ok {
if mainLineTitle == lineTitle {
lineInfo.Direction = 1
} else {
lineInfo.Direction = 0
}
} else {
lineInfo.Direction = -1
}
var vehicles []Vehicle var vehicles []Vehicle
garageNoStr, ok := bus["garage_no"].(string)
if ok {
vehicles = append(vehicles, Vehicle{
GarageId: garageNoStr,
Coords: Coords{},
AtStationId: -1,
AtStationName: "",
})
}
vehiclesJson, ok := bus["vehicles"].([]interface{}) vehiclesJson, ok := bus["vehicles"].([]interface{})
if !ok { if !ok {
@ -175,34 +190,38 @@ func Scrape(stations []Station, c ApiConfig) ([]ScrapeResult, error) {
continue continue
} }
stationNumber, err := strconv.Atoi(vehicle["station_number"].(string)) stationNumberString, ok := vehicle["station_number"].(string)
if err != nil { var stationNumber int = -1
Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\t%v", id, err) if ok {
continue stationNumber, err = strconv.Atoi(stationNumberString)
if err != nil {
Log.Printf("WARN: No station number for vehicle stationId=%v\n\t%v", id, err)
continue
}
} }
lat, err := strconv.ParseFloat(vehicle["lat"].(string), 64) lat, err := strconv.ParseFloat(vehicle["lat"].(string), 64)
if err != nil { if err != nil {
Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\t%v", id, err) Log.Printf("WARN: Failed to parse vehicle lat for stationId=%v\n\t%v", id, err)
continue continue
} }
lon, err := strconv.ParseFloat(vehicle["lng"].(string), 64) lon, err := strconv.ParseFloat(vehicle["lng"].(string), 64)
if err != nil { if err != nil {
Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\t%v", id, err) Log.Printf("WARN: Failed to parse vehicle lng for stationId=%v\n\t%v", id, err)
continue continue
} }
garageId, ok := vehicle["garageNo"].(string) garageId, ok := vehicle["garageNo"].(string)
if !ok { if !ok {
Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\tVehicle's garageNo invalid or missing: %v", id, vehicle) Log.Printf("WARN: Failed to parse vehicle garageNo for stationId=%v\n\tVehicle's garageNo invalid or missing: %v", id, vehicle)
continue continue
} }
atStationName, ok := vehicle["station_name"].(string) atStationName, ok := vehicle["station_name"].(string)
if !ok { if !ok {
Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\tVehicle's station_name invalid or missing: %v", id, vehicle) Log.Printf("WARN: Failed to parse station_name for vehicle at stationId=%v\n\tVehicle's station_name invalid or missing: %v", id, vehicle)
continue atStationName = ""
} }
vehicles = append(vehicles, Vehicle{ vehicles = append(vehicles, Vehicle{
@ -213,7 +232,12 @@ func Scrape(stations []Station, c ApiConfig) ([]ScrapeResult, error) {
}) })
} }
lineInfo.Route = scrapeLineRoute(bus["line_route"].([]interface{})) lineRouteJson, ok := bus["line_route"].([]interface{})
if ok {
lineInfo.Route = scrapeLineRoute(lineRouteJson)
} else {
lineInfo.Route = []Coords{}
}
all_stations, ok := bus["all_stations"].([]interface{}) all_stations, ok := bus["all_stations"].([]interface{})
if !ok { if !ok {
@ -275,6 +299,14 @@ func Scrape(stations []Station, c ApiConfig) ([]ScrapeResult, error) {
} }
SKIP_AUX_INFO: SKIP_AUX_INFO:
// Sometimes we don't get the vehicles array in the upstream response.
// In that case, we grab the only info about the bus we can get
// (its garageNo from the toplevel garage_no field).
// In case we parse *both* buses, pick the one last added.
if len(vehicles) > 1 {
vehicles = []Vehicle{vehicles[len(vehicles)-1]}
}
results = append(results, ScrapeResult{ results = append(results, ScrapeResult{
Success: true, Success: true,
Id: id, Id: id,