Add itinerary scraping
This commit is contained in:
parent
1d9db5b491
commit
b66b8f79b1
7 changed files with 118 additions and 1 deletions
|
@ -187,6 +187,8 @@ public static class RouteScraper {
|
|||
foreach (var div in leftSideDivs[2]
|
||||
.QuerySelectorAll(":scope > div")
|
||||
.Where((_, i) => i % 2 != 0)) {
|
||||
var text = div.Text().WithCollapsedSpaces();
|
||||
if (text == "Nu sunt stații intermediare.") continue;
|
||||
train.AddIntermediateStop(div.Text().WithCollapsedSpaces());
|
||||
}
|
||||
|
||||
|
|
40
server/Controllers/V3/ItinerariesController.cs
Normal file
40
server/Controllers/V3/ItinerariesController.cs
Normal file
|
@ -0,0 +1,40 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.AspNetCore.Http;
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using scraper.Models.Itinerary;
|
||||
using Server.Services.Interfaces;
|
||||
|
||||
namespace Server.Controllers.V3;
|
||||
|
||||
[ApiController]
|
||||
[ApiExplorerSettings(GroupName = "v3")]
|
||||
[Route("/v3/[controller]")]
|
||||
public class ItinerariesController : Controller {
|
||||
private IDataManager DataManager { get; }
|
||||
private IDatabase Database { get; }
|
||||
|
||||
public ItinerariesController(IDataManager dataManager, IDatabase database) {
|
||||
this.DataManager = dataManager;
|
||||
this.Database = database;
|
||||
}
|
||||
|
||||
|
||||
[HttpGet("")]
|
||||
[ProducesResponseType(typeof(IEnumerable<IItinerary>), StatusCodes.Status200OK)]
|
||||
[ProducesResponseType(StatusCodes.Status404NotFound)]
|
||||
public async Task<ActionResult<IEnumerable<IItinerary>>> FindItineraries(
|
||||
[FromQuery] string from,
|
||||
[FromQuery] string to,
|
||||
[FromQuery] DateTimeOffset? date
|
||||
) {
|
||||
var itineraries = await DataManager.FetchItineraries(from, to, date);
|
||||
|
||||
if (itineraries == null) {
|
||||
return NotFound();
|
||||
}
|
||||
|
||||
return Ok(itineraries);
|
||||
}
|
||||
}
|
|
@ -8,6 +8,7 @@ using Server.Services.Interfaces;
|
|||
using Server.Utils;
|
||||
using InfoferScraper;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using scraper.Models.Itinerary;
|
||||
|
||||
namespace Server.Services.Implementations {
|
||||
public class DataManager : IDataManager {
|
||||
|
@ -52,10 +53,27 @@ namespace Server.Services.Implementations {
|
|||
}
|
||||
return train;
|
||||
}, TimeSpan.FromSeconds(30));
|
||||
itinerariesCache = new(async (t) => {
|
||||
var (from, to, date) = t;
|
||||
var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone);
|
||||
|
||||
var itineraries = await InfoferScraper.Scrapers.RouteScraper.Scrape(from, to, zonedDate.ToDateTimeOffset());
|
||||
if (itineraries != null) {
|
||||
_ = Task.Run(async () => {
|
||||
var watch = Stopwatch.StartNew();
|
||||
await Database.OnItineraries(itineraries);
|
||||
var ms = watch.ElapsedMilliseconds;
|
||||
Logger.LogInformation("OnItineraries timing: {StationDataMs} ms", ms);
|
||||
});
|
||||
}
|
||||
|
||||
return itineraries;
|
||||
}, TimeSpan.FromMinutes(1));
|
||||
}
|
||||
|
||||
private readonly AsyncCache<(string, DateOnly), IStationScrapeResult?> stationCache;
|
||||
private readonly AsyncCache<(string, DateOnly), ITrainScrapeResult?> trainCache;
|
||||
private readonly AsyncCache<(string, string, DateOnly), IReadOnlyList<IItinerary>?> itinerariesCache;
|
||||
|
||||
public Task<IStationScrapeResult?> FetchStation(string stationName, DateTimeOffset date) {
|
||||
var cfrDateTime = new NodaTime.ZonedDateTime(NodaTime.Instant.FromDateTimeOffset(date), CfrTimeZone);
|
||||
|
@ -70,5 +88,12 @@ namespace Server.Services.Implementations {
|
|||
|
||||
return trainCache.GetItem((trainNumber, cfrDate));
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<IItinerary>?> FetchItineraries(string from, string to, DateTimeOffset? date = null) {
|
||||
var cfrDateTime = new NodaTime.ZonedDateTime(NodaTime.Instant.FromDateTimeOffset(date ?? DateTimeOffset.Now), CfrTimeZone);
|
||||
var cfrDate = new DateOnly(cfrDateTime.Year, cfrDateTime.Month, cfrDateTime.Day);
|
||||
|
||||
return await itinerariesCache.GetItem((from, to, cfrDate));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -13,6 +13,7 @@ using Microsoft.Extensions.Options;
|
|||
using MongoDB.Bson;
|
||||
using MongoDB.Bson.Serialization.Attributes;
|
||||
using MongoDB.Driver;
|
||||
using scraper.Models.Itinerary;
|
||||
using Server.Models.Database;
|
||||
using Server.Utils;
|
||||
|
||||
|
@ -339,6 +340,17 @@ public class Database : Server.Services.Interfaces.IDatabase {
|
|||
await ProcessTrain(train);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task OnItineraries(IReadOnlyList<IItinerary> itineraries) {
|
||||
foreach (var itinerary in itineraries) {
|
||||
foreach (var train in itinerary.Trains) {
|
||||
await FoundTrainAtStations(
|
||||
train.IntermediateStops.Concat(new[] { train.From, train.To }),
|
||||
train.TrainNumber
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public record DbRecord(
|
||||
|
|
|
@ -1,11 +1,14 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading.Tasks;
|
||||
using InfoferScraper.Models.Train;
|
||||
using InfoferScraper.Models.Station;
|
||||
using scraper.Models.Itinerary;
|
||||
|
||||
namespace Server.Services.Interfaces;
|
||||
|
||||
public interface IDataManager {
|
||||
public Task<IStationScrapeResult?> FetchStation(string stationName, DateTimeOffset date);
|
||||
public Task<ITrainScrapeResult?> FetchTrain(string trainNumber, DateTimeOffset date);
|
||||
public Task<IReadOnlyList<IItinerary>?> FetchItineraries(string from, string to, DateTimeOffset? date = null);
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ using System.Collections.Generic;
|
|||
using System.Threading.Tasks;
|
||||
using InfoferScraper.Models.Train;
|
||||
using InfoferScraper.Models.Station;
|
||||
using scraper.Models.Itinerary;
|
||||
using Server.Models.Database;
|
||||
|
||||
namespace Server.Services.Interfaces;
|
||||
|
@ -15,4 +16,5 @@ public interface IDatabase {
|
|||
public Task FoundTrainAtStation(string stationName, string trainName);
|
||||
public Task OnTrainData(ITrainScrapeResult trainData);
|
||||
public Task OnStationData(IStationScrapeResult stationData);
|
||||
public Task OnItineraries(IReadOnlyList<IItinerary> itineraries);
|
||||
}
|
||||
|
|
33
server/Utils/IAsyncCusorAsyncAdapter.cs
Normal file
33
server/Utils/IAsyncCusorAsyncAdapter.cs
Normal file
|
@ -0,0 +1,33 @@
|
|||
using System.Collections.Generic;
|
||||
using System.Threading.Tasks;
|
||||
using MongoDB.Driver;
|
||||
|
||||
namespace Server.Utils;
|
||||
|
||||
public record IAsyncCusorAsyncEnumerator<T>(IAsyncCursor<T> Cursor) {
|
||||
private IEnumerator<T>? enumerator = null;
|
||||
|
||||
public T Current => enumerator!.Current;
|
||||
|
||||
public async Task<bool> MoveNextAsync() {
|
||||
bool result;
|
||||
if (enumerator != null) {
|
||||
result = enumerator.MoveNext();
|
||||
if (result) return true;
|
||||
}
|
||||
|
||||
result = await Cursor.MoveNextAsync();
|
||||
if (result) {
|
||||
enumerator = Cursor.Current.GetEnumerator();
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public static class IAsyncCursorExtensions {
|
||||
public static IAsyncCusorAsyncEnumerator<T> GetAsyncEnumerator<T>(this IAsyncCursor<T> cursor) {
|
||||
return new(cursor);
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue