init: initial commit

This commit is contained in:
dvdrw 2023-09-28 14:27:31 +02:00
commit acac91dbfa
Signed by: dvdrw
GPG Key ID: 4756FA53D8797D7F
7 changed files with 455 additions and 0 deletions

5
go.mod Normal file
View File

@ -0,0 +1,5 @@
module git.dvdrw.dev/nsmarter/scraper
go 1.19
require github.com/mattn/go-sqlite3 v1.14.17 // indirect

2
go.sum Normal file
View File

@ -0,0 +1,2 @@
github.com/mattn/go-sqlite3 v1.14.17 h1:mCRHCLDUBXgpKAqIKsaAaAsrAlbkeomtRFKXh2L6YIM=
github.com/mattn/go-sqlite3 v1.14.17/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=

55
main/db.go Normal file
View File

@ -0,0 +1,55 @@
package main
import (
"database/sql"
"fmt"
"git.dvdrw.dev/nsmarter/scraper/scraper"
_ "github.com/mattn/go-sqlite3"
)
func readDb() ([]scraper.Station, error) {
db, err := sql.Open("sqlite3", stationDbPath)
if err != nil {
return nil, err
}
defer db.Close()
queryString := "SELECT stationId FROM stations"
if innerCityOnly {
queryString += " WHERE cityId=72 "
}
if(limitQuerySize > 0) {
queryString += fmt.Sprintf(" LIMIT %d ", limitQuerySize)
}
queryString += ";"
rows, err := db.Query(queryString)
if err != nil {
return nil, err
}
defer rows.Close()
var stations []scraper.Station = make([]scraper.Station, 0, 1000)
for i := 0; rows.Next(); i++ {
station := scraper.Station{}
err = rows.Scan(&station.Id)
if err != nil {
return nil, err
}
stations = append(stations, station)
Log.Printf("Imported station with ID=%v\n", station.Id)
}
err = rows.Err()
if err != nil {
return nil, err
}
return stations, nil
}

BIN
main/main Executable file

Binary file not shown.

92
main/main.go Normal file
View File

@ -0,0 +1,92 @@
package main
import (
"fmt"
"git.dvdrw.dev/nsmarter/scraper/scraper"
"log"
"os"
"strconv"
"strings"
)
var Log = log.Default()
const defaultChunkSize = 5
const defaultLimitQuerySize = 0
var stationDbPath string
var innerCityOnly bool = false
var chunkSize = 5
var apiEndpoint string = "https://online.nsmart.rs/publicapi/v1/announcement/announcement.php"
var apiKey string
var limitQuerySize = 0
func parseEnvars() {
for _, e := range os.Environ() {
pair := strings.SplitN(e, "=", 2)
switch pair[0] {
case "STATIONS_DB_PATH":
stationDbPath = pair[1]
case "INNER_CITY_ONLY":
innerCityOnly = pair[1] == "1"
case "CHUNK_SIZE":
var err error
chunkSize, err = strconv.Atoi(pair[1])
if err != nil {
Log.Printf("WARN: Invalid value for CHUNK_SIZE. Falling back to default value (%v)\n", defaultChunkSize)
chunkSize = defaultChunkSize
}
case "LIMIT_QUERY_SIZE":
var err error
limitQuerySize, err = strconv.Atoi(pair[1])
if err != nil {
Log.Printf("WARN: Invalid value for LIMIT_QUERY_SIZE. Falling back to default value (%v)\n", defaultLimitQuerySize)
limitQuerySize = defaultLimitQuerySize
}
case "API_ENDPOINT":
apiEndpoint = pair[1]
case "API_KEY":
apiKey = pair[1]
}
}
}
func main() {
// Parse out config from environment variables
parseEnvars()
if stationDbPath == "" {
log.Fatal("Environment variable STATIONS_DB_PATH empty!")
}
stations, err := readDb()
if err != nil {
log.Fatal(err)
}
Log.Printf("Finished importing stations! (Total: %v)", len(stations))
// Split stations so as to be scraped in chunks
var stationChunks [][]scraper.Station
for i := 0; i < len(stations); i += chunkSize {
end := i + chunkSize
if end > len(stations) {
end = len(stations)
}
stationChunks = append(stationChunks, stations[i:end])
}
results := make(chan []scraper.ScrapeResult, 200)
for _, chunk := range stationChunks {
go scraper.ScheduleScrape(chunk,
results,
scraper.ApiConfig{Endpoint: apiEndpoint,
Key: apiKey})
}
for r := range results {
fmt.Printf("Received data: %#v\n", r)
}
}

292
scraper/scrape.go Normal file
View File

@ -0,0 +1,292 @@
package scraper
import (
"encoding/json"
"fmt"
"net/http"
"strconv"
"strings"
"time"
"log"
)
type Coords struct {
Lat, Lon float64
}
type Vehicle struct {
GarageId string
Coords Coords
AtStationId int
AtStationName string
}
type StationInfo struct {
Id int
CityId int
CityName string
Name string
Coords Coords
}
type LineInfo struct {
Name string
Title string
Stations []StationInfo
Route []Coords
}
type ScrapeResult struct {
Id int
Success bool
SecondsLeft int
LineInfo LineInfo
Vehicles []Vehicle
}
type Station struct {
Id int
}
var Log = log.Default()
const apiParams = "?ibfm=TM000001&action=get_all_trips&station_uids="
func scrapeRange(s []Station) string {
return strings.Join(foreach(s, func(s Station) string { return fmt.Sprintf("%v", s.Id) }), ";")
}
type ApiConfig struct {
Endpoint, Key string
}
func grabData(stations []Station, c ApiConfig) (map[string][]map[string]interface{}, error) {
req, err := http.NewRequest(http.MethodGet, c.Endpoint+apiParams+scrapeRange(stations), nil)
if err != nil {
return nil, err
}
req.Header.Set("X-Api-Authentication", c.Key)
before := time.Now()
resp, err := http.DefaultClient.Do(req)
elapsed := time.Since(before)
if elapsed > time.Second*5 {
Log.Printf("WARN: Got response from upstream in %v seconds", elapsed.Seconds())
}
if err != nil {
return nil, err
} else if resp.StatusCode != 200 {
return nil, fmt.Errorf("Got non-200 code in response: %v", resp)
}
var d map[string][]map[string]interface{}
body := json.NewDecoder(resp.Body)
err = body.Decode(&d)
if err != nil {
return nil, err
}
return d, nil
}
func scrapeLineRoute(in []interface{}) []Coords {
return foreach(in, func(x interface{}) Coords {
s, _ := x.(string)
var cs Coords
fmt.Sscanf(s, "%f,%f", &cs.Lon, &cs.Lat)
return cs
})
}
func Scrape(stations []Station, c ApiConfig) ([]ScrapeResult, error) {
d, err := grabData(stations, c)
if err != nil {
return nil, err
}
var results []ScrapeResult
for idString, stationResult := range d {
id, err := strconv.Atoi(idString)
if err != nil {
Log.Printf("WARN: Failed to parse response for stationId=%v\n\t%v", idString, err)
continue
}
for _, bus := range stationResult {
_, ok := bus["success"]
// Field `success' only exists if it is false
if ok {
Log.Printf("WARN: Got unsuccessful response for stationId=%d. Perhaps it no longer exists?\n", id)
continue
}
secondsLeft, ok := bus["seconds_left"].(float64)
if !ok {
// No bus is coming to this station
results = append(results, ScrapeResult{
Success: true,
Id: id,
SecondsLeft: -1,
})
continue
}
var lineInfo LineInfo
lineName, ok := bus["line_number"].(string)
if !ok {
lineName = ""
}
lineInfo.Name = lineName
lineTitle, ok := bus["line_title"].(string)
if ok {
lineInfo.Title = lineTitle
}
var vehicles []Vehicle
vehiclesJson, ok := bus["vehicles"].([]interface{})
if !ok {
goto SKIP_AUX_INFO
}
{
for _, vehicleJson := range vehiclesJson {
vehicle, ok := vehicleJson.(map[string]interface{})
if !ok {
Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\t%v", id, vehicle)
continue
}
stationNumber, err := strconv.Atoi(vehicle["station_number"].(string))
if err != nil {
Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\t%v", id, err)
continue
}
lat, err := strconv.ParseFloat(vehicle["lat"].(string), 64)
if err != nil {
Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\t%v", id, err)
continue
}
lon, err := strconv.ParseFloat(vehicle["lng"].(string), 64)
if err != nil {
Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\t%v", id, err)
continue
}
garageId, ok := vehicle["garageNo"].(string)
if !ok {
Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\tVehicle's garageNo invalid or missing: %v", id, vehicle)
continue
}
atStationName, ok := vehicle["station_name"].(string)
if !ok {
Log.Printf("WARN: Failed to parse vehicle for stationId=%v\n\tVehicle's station_name invalid or missing: %v", id, vehicle)
continue
}
vehicles = append(vehicles, Vehicle{
GarageId: garageId,
AtStationName: atStationName,
AtStationId: stationNumber,
Coords: Coords{lat, lon},
})
}
lineInfo.Route = scrapeLineRoute(bus["line_route"].([]interface{}))
all_stations, ok := bus["all_stations"].([]interface{})
if !ok {
goto SKIP_AUX_INFO
}
{
for _, station := range all_stations {
station, ok := station.(map[string]interface{})
if !ok {
continue
}
stationId, ok := station["id"].(float64)
if !ok {
continue
}
cityId, ok := station["city_id"].(float64)
if !ok {
continue
}
cityName, ok := station["city_name"].(string)
if !ok {
continue
}
stationName, ok := station["name"].(string)
if err != nil {
continue
}
stationInfo := StationInfo{
Id: int(stationId),
CityId: int(cityId),
CityName: cityName,
Name: stationName,
}
coordinates, ok := station["coordinates"].(map[string]interface{})
if ok {
lat, err := strconv.ParseFloat(coordinates["latitude"].(string), 64)
if err != nil {
break
}
lon, err := strconv.ParseFloat(coordinates["latitude"].(string), 64)
if err != nil {
break
}
stationInfo.Coords = Coords{lat, lon}
}
lineInfo.Stations = append(lineInfo.Stations, stationInfo)
}
}
}
SKIP_AUX_INFO:
results = append(results, ScrapeResult{
Success: true,
Id: id,
LineInfo: lineInfo,
SecondsLeft: int(secondsLeft),
Vehicles: vehicles,
})
}
}
Log.Printf("Successfully scraped range %s", scrapeRange(stations))
return results, nil
}
func ScheduleScrape(chunk []Station, c chan []ScrapeResult, a ApiConfig) {
r, err := Scrape(chunk, a)
if err != nil {
Log.Printf("ERROR: Scraping failed for stations %#v\n\t%v", chunk, err)
}
c <- r
time.Sleep(time.Minute * 3)
ScheduleScrape(chunk, c, a)
}

9
scraper/util.go Normal file
View File

@ -0,0 +1,9 @@
package scraper
func foreach[T any, V any](ts []T, f func(T)V) []V {
vs := make([]V, len(ts))
for i, t := range ts {
vs[i] = f(t)
}
return vs
}