diff --git a/scraper/src/Models/Train.cs b/scraper/src/Models/Train.cs
index 9ebdeb7..aa34507 100644
--- a/scraper/src/Models/Train.cs
+++ b/scraper/src/Models/Train.cs
@@ -1,316 +1,318 @@
-using System;
-using System.Collections.Generic;
-using System.Text.Json;
-using System.Text.Json.Serialization;
-using InfoferScraper.Models.Status;
-using InfoferScraper.Models.Train.JsonConverters;
-
-namespace InfoferScraper.Models.Train {
- #region Interfaces
-
- public interface ITrainScrapeResult {
- public string Rank { get; }
-
- public string Number { get; }
-
- ///
- /// Date in the DD.MM.YYYY format
- /// This date is taken as-is from the result.
- ///
- public string Date { get; }
-
- public string Operator { get; }
-
- public IReadOnlyList Groups { get; }
- }
-
- public interface ITrainGroup {
- public ITrainRoute Route { get; }
-
- public ITrainStatus? Status { get; }
- public IReadOnlyList Stations { get; }
- }
-
- public interface ITrainRoute {
- public string From { get; }
- public string To { get; }
- }
-
- public interface ITrainStatus {
- public int Delay { get; }
- public string Station { get; }
- public StatusKind State { get; }
- }
-
- public interface ITrainStopDescription {
- public string Name { get; }
- public int Km { get; }
-
- ///
- /// The time the train waits in the station in seconds
- ///
- public int? StoppingTime { get; }
-
- public string? Platform { get; }
- public ITrainStopArrDep? Arrival { get; }
- public ITrainStopArrDep? Departure { get; }
-
- public IReadOnlyList Notes { get; }
- }
-
- public interface ITrainStopNote {
- public NoteKind Kind { get; }
- }
-
- public interface ITrainStopTrainNumberChangeNote : ITrainStopNote {
- public string Rank { get; }
- public string Number { get; }
- }
-
- public interface ITrainStopDepartsAsNote : ITrainStopNote {
- public string Rank { get; }
- public string Number { get; }
- public DateTimeOffset DepartureDate { get; }
- }
-
- public interface ITrainStopDetachingWagonsNote : ITrainStopNote {
- public string Station { get; }
- }
-
- public interface ITrainStopReceivingWagonsNote : ITrainStopNote {
- public string Station { get; }
- }
-
- public interface ITrainStopArrDep {
- public DateTimeOffset ScheduleTime { get; }
- public IStatus? Status { get; }
- }
-
- #endregion
-
- [JsonConverter(typeof(StatusKindConverter))]
- public enum StatusKind {
- Passing,
- Arrival,
- Departure,
- }
-
- [JsonConverter(typeof(NoteKindConverter))]
- public enum NoteKind {
- TrainNumberChange,
- DetachingWagons,
- ReceivingWagons,
- DepartsAs,
- }
-
- #region Implementations
-
- internal record TrainScrapeResult : ITrainScrapeResult {
- private List ModifyableGroups { get; set; } = new();
- public string Rank { get; set; } = "";
- public string Number { get; set; } = "";
- public string Date { get; set; } = "";
- public string Operator { get; set; } = "";
- public IReadOnlyList Groups => ModifyableGroups.AsReadOnly();
-
- private void AddTrainGroup(ITrainGroup trainGroup) {
- ModifyableGroups.Add(trainGroup);
- }
-
- internal void AddTrainGroup(Action configurator) {
- TrainGroup newTrainGroup = new();
- configurator(newTrainGroup);
- AddTrainGroup(newTrainGroup);
- }
- }
-
- internal record TrainGroup : ITrainGroup {
- private List ModifyableStations { get; set; } = new();
- public ITrainRoute Route { get; init; } = new TrainRoute();
- public ITrainStatus? Status { get; private set; }
- public IReadOnlyList Stations => ModifyableStations.AsReadOnly();
-
- private void AddStopDescription(ITrainStopDescription stopDescription) {
- ModifyableStations.Add(stopDescription);
- }
-
- internal void AddStopDescription(Action configurator) {
- TrainStopDescription newStopDescription = new();
- configurator(newStopDescription);
- AddStopDescription(newStopDescription);
- }
-
- internal void ConfigureRoute(Action configurator) {
- configurator((TrainRoute)Route);
- }
-
- internal void MakeStatus(Action configurator) {
- TrainStatus newStatus = new();
- configurator(newStatus);
- Status = newStatus;
- }
- }
-
- internal record TrainRoute : ITrainRoute {
- public TrainRoute() {
- From = "";
- To = "";
- }
-
- public string From { get; set; }
- public string To { get; set; }
- }
-
- internal record TrainStatus : ITrainStatus {
- public int Delay { get; set; }
- public string Station { get; set; } = "";
- public StatusKind State { get; set; }
- }
-
- internal record TrainStopDescription : ITrainStopDescription {
- private List ModifyableNotes { get; } = new();
- public string Name { get; set; } = "";
- public int Km { get; set; }
- public int? StoppingTime { get; set; }
- public string? Platform { get; set; }
- public ITrainStopArrDep? Arrival { get; private set; }
- public ITrainStopArrDep? Departure { get; private set; }
- public IReadOnlyList Notes => ModifyableNotes.AsReadOnly();
-
- internal void MakeArrival(Action configurator) {
- TrainStopArrDep newArrival = new();
- configurator(newArrival);
- Arrival = newArrival;
- }
-
- internal void MakeDeparture(Action configurator) {
- TrainStopArrDep newDeparture = new();
- configurator(newDeparture);
- Departure = newDeparture;
- }
-
- class DepartsAsNote : ITrainStopDepartsAsNote {
- public NoteKind Kind => NoteKind.DepartsAs;
- public string Rank { get; set; } = "";
- public string Number { get; set; } = "";
- public DateTimeOffset DepartureDate { get; set; }
- }
-
- class TrainNumberChangeNote : ITrainStopTrainNumberChangeNote {
- public NoteKind Kind => NoteKind.TrainNumberChange;
- public string Rank { get; set; } = "";
- public string Number { get; set; } = "";
- }
-
- class ReceivingWagonsNote : ITrainStopReceivingWagonsNote {
- public NoteKind Kind => NoteKind.ReceivingWagons;
- public string Station { get; set; } = "";
- }
-
- class DetachingWagonsNote : ITrainStopReceivingWagonsNote {
- public NoteKind Kind => NoteKind.DetachingWagons;
- public string Station { get; set; } = "";
- }
-
- internal void AddDepartsAsNote(string rank, string number, DateTimeOffset departureDate) {
- ModifyableNotes.Add(new DepartsAsNote { Rank = rank, Number = number, DepartureDate = departureDate });
- }
-
- internal void AddTrainNumberChangeNote(string rank, string number) {
- ModifyableNotes.Add(new TrainNumberChangeNote { Rank = rank, Number = number });
- }
-
- internal void AddReceivingWagonsNote(string station) {
- ModifyableNotes.Add(new ReceivingWagonsNote { Station = station });
- }
-
- internal void AddDetachingWagonsNote(string station) {
- ModifyableNotes.Add(new DetachingWagonsNote { Station = station });
- }
- }
-
- public record TrainStopArrDep : ITrainStopArrDep {
- public DateTimeOffset ScheduleTime { get; set; }
- public IStatus? Status { get; private set; }
-
- internal void MakeStatus(Action configurator) {
- Status.Status newStatus = new();
- configurator(newStatus);
- Status = newStatus;
- }
- }
-
- #endregion
-
- #region JSON Converters
-
- namespace JsonConverters {
- internal class StatusKindConverter : JsonConverterFactory {
- public override bool CanConvert(Type typeToConvert) {
- return typeToConvert == typeof(StatusKind);
- }
-
- public override JsonConverter? CreateConverter(Type typeToConvert, JsonSerializerOptions options) {
- return new Converter();
- }
-
- private class Converter : JsonConverter {
- public override StatusKind Read(
- ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options
- ) {
- return reader.GetString() switch {
- "arrival" => StatusKind.Arrival,
- "departure" => StatusKind.Departure,
- "passing" => StatusKind.Passing,
- _ => throw new NotImplementedException()
- };
- }
-
- public override void Write(Utf8JsonWriter writer, StatusKind value, JsonSerializerOptions options) {
- writer.WriteStringValue(value switch {
- StatusKind.Passing => "passing",
- StatusKind.Arrival => "arrival",
- StatusKind.Departure => "departure",
- _ => throw new NotImplementedException()
- });
- }
- }
- }
-
- internal class NoteKindConverter : JsonConverterFactory {
- public override bool CanConvert(Type typeToConvert) {
- return typeToConvert == typeof(NoteKind);
- }
-
- public override JsonConverter? CreateConverter(Type typeToConvert, JsonSerializerOptions options) {
- return new Converter();
- }
-
- private class Converter : JsonConverter {
- public override NoteKind Read(
- ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options
- ) {
- return reader.GetString() switch {
- "departsAs" => NoteKind.DepartsAs,
- "trainNumberChange" => NoteKind.TrainNumberChange,
- "receivingWagons" => NoteKind.ReceivingWagons,
- "detachingWagons" => NoteKind.DetachingWagons,
- _ => throw new NotImplementedException()
- };
- }
-
- public override void Write(Utf8JsonWriter writer, NoteKind value, JsonSerializerOptions options) {
- writer.WriteStringValue(value switch {
- NoteKind.DepartsAs => "departsAs",
- NoteKind.TrainNumberChange => "trainNumberChange",
- NoteKind.DetachingWagons => "detachingWagons",
- NoteKind.ReceivingWagons => "receivingWagons",
- _ => throw new NotImplementedException()
- });
- }
- }
- }
- }
-
- #endregion
-}
+using System;
+using System.Collections.Generic;
+using System.Text.Json;
+using System.Text.Json.Serialization;
+using InfoferScraper.Models.Status;
+using InfoferScraper.Models.Train.JsonConverters;
+
+namespace InfoferScraper.Models.Train {
+ #region Interfaces
+
+ public interface ITrainScrapeResult {
+ public string Rank { get; }
+
+ public string Number { get; }
+
+ ///
+ /// Date in the DD.MM.YYYY format
+ /// This date is taken as-is from the result.
+ ///
+ public string Date { get; }
+
+ public string Operator { get; }
+
+ public IReadOnlyList Groups { get; }
+ }
+
+ public interface ITrainGroup {
+ public ITrainRoute Route { get; }
+
+ public ITrainStatus? Status { get; }
+ public IReadOnlyList Stations { get; }
+ }
+
+ public interface ITrainRoute {
+ public string From { get; }
+ public string To { get; }
+ }
+
+ public interface ITrainStatus {
+ public int Delay { get; }
+ public string Station { get; }
+ public StatusKind State { get; }
+ }
+
+ public interface ITrainStopDescription {
+ public string Name { get; }
+ public string LinkName { get; }
+ public int Km { get; }
+
+ ///
+ /// The time the train waits in the station in seconds
+ ///
+ public int? StoppingTime { get; }
+
+ public string? Platform { get; }
+ public ITrainStopArrDep? Arrival { get; }
+ public ITrainStopArrDep? Departure { get; }
+
+ public IReadOnlyList Notes { get; }
+ }
+
+ public interface ITrainStopNote {
+ public NoteKind Kind { get; }
+ }
+
+ public interface ITrainStopTrainNumberChangeNote : ITrainStopNote {
+ public string Rank { get; }
+ public string Number { get; }
+ }
+
+ public interface ITrainStopDepartsAsNote : ITrainStopNote {
+ public string Rank { get; }
+ public string Number { get; }
+ public DateTimeOffset DepartureDate { get; }
+ }
+
+ public interface ITrainStopDetachingWagonsNote : ITrainStopNote {
+ public string Station { get; }
+ }
+
+ public interface ITrainStopReceivingWagonsNote : ITrainStopNote {
+ public string Station { get; }
+ }
+
+ public interface ITrainStopArrDep {
+ public DateTimeOffset ScheduleTime { get; }
+ public IStatus? Status { get; }
+ }
+
+ #endregion
+
+ [JsonConverter(typeof(StatusKindConverter))]
+ public enum StatusKind {
+ Passing,
+ Arrival,
+ Departure,
+ }
+
+ [JsonConverter(typeof(NoteKindConverter))]
+ public enum NoteKind {
+ TrainNumberChange,
+ DetachingWagons,
+ ReceivingWagons,
+ DepartsAs,
+ }
+
+ #region Implementations
+
+ internal record TrainScrapeResult : ITrainScrapeResult {
+ private List ModifyableGroups { get; set; } = new();
+ public string Rank { get; set; } = "";
+ public string Number { get; set; } = "";
+ public string Date { get; set; } = "";
+ public string Operator { get; set; } = "";
+ public IReadOnlyList Groups => ModifyableGroups.AsReadOnly();
+
+ private void AddTrainGroup(ITrainGroup trainGroup) {
+ ModifyableGroups.Add(trainGroup);
+ }
+
+ internal void AddTrainGroup(Action configurator) {
+ TrainGroup newTrainGroup = new();
+ configurator(newTrainGroup);
+ AddTrainGroup(newTrainGroup);
+ }
+ }
+
+ internal record TrainGroup : ITrainGroup {
+ private List ModifyableStations { get; set; } = new();
+ public ITrainRoute Route { get; init; } = new TrainRoute();
+ public ITrainStatus? Status { get; private set; }
+ public IReadOnlyList Stations => ModifyableStations.AsReadOnly();
+
+ private void AddStopDescription(ITrainStopDescription stopDescription) {
+ ModifyableStations.Add(stopDescription);
+ }
+
+ internal void AddStopDescription(Action configurator) {
+ TrainStopDescription newStopDescription = new();
+ configurator(newStopDescription);
+ AddStopDescription(newStopDescription);
+ }
+
+ internal void ConfigureRoute(Action configurator) {
+ configurator((TrainRoute)Route);
+ }
+
+ internal void MakeStatus(Action configurator) {
+ TrainStatus newStatus = new();
+ configurator(newStatus);
+ Status = newStatus;
+ }
+ }
+
+ internal record TrainRoute : ITrainRoute {
+ public TrainRoute() {
+ From = "";
+ To = "";
+ }
+
+ public string From { get; set; }
+ public string To { get; set; }
+ }
+
+ internal record TrainStatus : ITrainStatus {
+ public int Delay { get; set; }
+ public string Station { get; set; } = "";
+ public StatusKind State { get; set; }
+ }
+
+ internal record TrainStopDescription : ITrainStopDescription {
+ private List ModifyableNotes { get; } = new();
+ public string Name { get; set; } = "";
+ public string LinkName { get; set; } = "";
+ public int Km { get; set; }
+ public int? StoppingTime { get; set; }
+ public string? Platform { get; set; }
+ public ITrainStopArrDep? Arrival { get; private set; }
+ public ITrainStopArrDep? Departure { get; private set; }
+ public IReadOnlyList Notes => ModifyableNotes.AsReadOnly();
+
+ internal void MakeArrival(Action configurator) {
+ TrainStopArrDep newArrival = new();
+ configurator(newArrival);
+ Arrival = newArrival;
+ }
+
+ internal void MakeDeparture(Action configurator) {
+ TrainStopArrDep newDeparture = new();
+ configurator(newDeparture);
+ Departure = newDeparture;
+ }
+
+ class DepartsAsNote : ITrainStopDepartsAsNote {
+ public NoteKind Kind => NoteKind.DepartsAs;
+ public string Rank { get; set; } = "";
+ public string Number { get; set; } = "";
+ public DateTimeOffset DepartureDate { get; set; }
+ }
+
+ class TrainNumberChangeNote : ITrainStopTrainNumberChangeNote {
+ public NoteKind Kind => NoteKind.TrainNumberChange;
+ public string Rank { get; set; } = "";
+ public string Number { get; set; } = "";
+ }
+
+ class ReceivingWagonsNote : ITrainStopReceivingWagonsNote {
+ public NoteKind Kind => NoteKind.ReceivingWagons;
+ public string Station { get; set; } = "";
+ }
+
+ class DetachingWagonsNote : ITrainStopReceivingWagonsNote {
+ public NoteKind Kind => NoteKind.DetachingWagons;
+ public string Station { get; set; } = "";
+ }
+
+ internal void AddDepartsAsNote(string rank, string number, DateTimeOffset departureDate) {
+ ModifyableNotes.Add(new DepartsAsNote { Rank = rank, Number = number, DepartureDate = departureDate });
+ }
+
+ internal void AddTrainNumberChangeNote(string rank, string number) {
+ ModifyableNotes.Add(new TrainNumberChangeNote { Rank = rank, Number = number });
+ }
+
+ internal void AddReceivingWagonsNote(string station) {
+ ModifyableNotes.Add(new ReceivingWagonsNote { Station = station });
+ }
+
+ internal void AddDetachingWagonsNote(string station) {
+ ModifyableNotes.Add(new DetachingWagonsNote { Station = station });
+ }
+ }
+
+ public record TrainStopArrDep : ITrainStopArrDep {
+ public DateTimeOffset ScheduleTime { get; set; }
+ public IStatus? Status { get; private set; }
+
+ internal void MakeStatus(Action configurator) {
+ Status.Status newStatus = new();
+ configurator(newStatus);
+ Status = newStatus;
+ }
+ }
+
+ #endregion
+
+ #region JSON Converters
+
+ namespace JsonConverters {
+ internal class StatusKindConverter : JsonConverterFactory {
+ public override bool CanConvert(Type typeToConvert) {
+ return typeToConvert == typeof(StatusKind);
+ }
+
+ public override JsonConverter? CreateConverter(Type typeToConvert, JsonSerializerOptions options) {
+ return new Converter();
+ }
+
+ private class Converter : JsonConverter {
+ public override StatusKind Read(
+ ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options
+ ) {
+ return reader.GetString() switch {
+ "arrival" => StatusKind.Arrival,
+ "departure" => StatusKind.Departure,
+ "passing" => StatusKind.Passing,
+ _ => throw new NotImplementedException()
+ };
+ }
+
+ public override void Write(Utf8JsonWriter writer, StatusKind value, JsonSerializerOptions options) {
+ writer.WriteStringValue(value switch {
+ StatusKind.Passing => "passing",
+ StatusKind.Arrival => "arrival",
+ StatusKind.Departure => "departure",
+ _ => throw new NotImplementedException()
+ });
+ }
+ }
+ }
+
+ internal class NoteKindConverter : JsonConverterFactory {
+ public override bool CanConvert(Type typeToConvert) {
+ return typeToConvert == typeof(NoteKind);
+ }
+
+ public override JsonConverter? CreateConverter(Type typeToConvert, JsonSerializerOptions options) {
+ return new Converter();
+ }
+
+ private class Converter : JsonConverter {
+ public override NoteKind Read(
+ ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options
+ ) {
+ return reader.GetString() switch {
+ "departsAs" => NoteKind.DepartsAs,
+ "trainNumberChange" => NoteKind.TrainNumberChange,
+ "receivingWagons" => NoteKind.ReceivingWagons,
+ "detachingWagons" => NoteKind.DetachingWagons,
+ _ => throw new NotImplementedException()
+ };
+ }
+
+ public override void Write(Utf8JsonWriter writer, NoteKind value, JsonSerializerOptions options) {
+ writer.WriteStringValue(value switch {
+ NoteKind.DepartsAs => "departsAs",
+ NoteKind.TrainNumberChange => "trainNumberChange",
+ NoteKind.DetachingWagons => "detachingWagons",
+ NoteKind.ReceivingWagons => "receivingWagons",
+ _ => throw new NotImplementedException()
+ });
+ }
+ }
+ }
+ }
+
+ #endregion
+}
diff --git a/scraper/src/Scrapers/Train.cs b/scraper/src/Scrapers/Train.cs
index 4c24409..57cd25b 100644
--- a/scraper/src/Scrapers/Train.cs
+++ b/scraper/src/Scrapers/Train.cs
@@ -1,239 +1,244 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Net;
-using System.Net.Http;
-using System.Text.RegularExpressions;
-using System.Threading.Tasks;
-using AngleSharp;
-using AngleSharp.Dom;
-using AngleSharp.Html.Dom;
-using Flurl;
-using InfoferScraper.Models.Train;
-using NodaTime;
-using NodaTime.Extensions;
-using scraper.Exceptions;
-
-namespace InfoferScraper.Scrapers {
- public static class TrainScraper {
- private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/";
- private static readonly Regex TrainInfoRegex = new(@"^([A-Z-]+)\s([0-9]+)\sîn\s([0-9.]+)$");
- private static readonly Regex OperatorRegex = new(@"^Operat\sde\s(.+)$");
-
- private static readonly Regex RouteRegex =
- new(@$"^Parcurs\stren\s([{Utils.RoLetters} ]+)[-–]([{Utils.RoLetters}\s]+)$");
-
- private static readonly Regex SlRegex =
- new(
- @"^(?:Fără|([0-9]+)\smin)\s(întârziere|mai\sdevreme)\sla\s(trecerea\sfără\soprire\sprin|sosirea\sîn|plecarea\sdin)\s(.+)\.$");
-
- private static readonly Dictionary SlStateMap = new() {
- { 't', StatusKind.Passing },
- { 's', StatusKind.Arrival },
- { 'p', StatusKind.Departure },
- };
-
- private static readonly Regex KmRegex = new(@"^km\s([0-9]+)$");
- private static readonly Regex StoppingTimeRegex = new(@"^([0-9]+)\s(min|sec)\soprire$");
- private static readonly Regex PlatformRegex = new(@"^linia\s(.+)$");
-
- private static readonly Regex StationArrdepStatusRegex =
- new(@"^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$");
-
- private static readonly Regex TrainNumberChangeNoteRegex =
- new(@"^Trenul își schimbă numărul în\s([A-Z-]+)\s([0-9]+)$");
- private static readonly Regex DepartsAsNoteRegex =
- new(@"^Trenul pleacă cu numărul\s([A-Z-]+)\s([0-9]+)\sîn\s([0-9]{2}).([0-9]{2}).([0-9]{4})$");
- private static readonly Regex ReceivingWagonsNoteRegex =
- new(@"^Trenul primește vagoane de la\s(.+)\.$");
- private static readonly Regex DetachingWagonsNoteRegex =
- new(@"^Trenul detașează vagoane pentru stația\s(.+)\.$");
-
- private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"];
-
- private static readonly CookieContainer CookieContainer = new();
- private static readonly HttpClient HttpClient = new(new HttpClientHandler {
- CookieContainer = CookieContainer,
- UseCookies = true,
- }) {
- BaseAddress = new Uri(BaseUrl),
- DefaultRequestVersion = new Version(2, 0),
- };
-
- public static async Task Scrape(string trainNumber, DateTimeOffset? dateOverride = null) {
- var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz);
- dateOverride = dateOverrideInstant?.ToDateTimeOffset();
- TrainScrapeResult result = new();
-
- var asConfig = Configuration.Default;
- var asContext = BrowsingContext.New(asConfig);
-
- var firstUrl = "Tren"
- .AppendPathSegment(trainNumber);
- if (dateOverride != null) {
- firstUrl = firstUrl.SetQueryParam("Date", $"{dateOverride:d.MM.yyyy}");
- }
- var firstResponse = await HttpClient.GetStringAsync(firstUrl);
- var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse));
- var firstForm = firstDocument.GetElementById("form-search")!;
-
- var firstResult = firstForm
- .QuerySelectorAll("input")
- .Where(elem => elem.Name != null)
- .ToDictionary(elem => elem.Name!, elem => elem.Value);
-
- var secondUrl = "".AppendPathSegments("Trains", "TrainsResult");
- var secondResponse = await HttpClient.PostAsync(
- secondUrl,
-#pragma warning disable CS8620
- new FormUrlEncodedContent(firstResult)
-#pragma warning restore CS8620
- );
- var secondResponseContent = await secondResponse.Content.ReadAsStringAsync();
- var secondDocument = await asContext.OpenAsync(
- req => req.Content(secondResponseContent)
- );
-
- var (trainInfoDiv, (_, (_, (resultsDiv, _)))) = secondDocument
- .QuerySelectorAll("body > div");
- if (trainInfoDiv == null) {
- return null;
- }
- if (resultsDiv == null) {
- throw new TrainNotThisDayException();
- }
- trainInfoDiv = trainInfoDiv.QuerySelectorAll(":scope > div > div").First();
-
- (result.Rank, (result.Number, (result.Date, _))) = (TrainInfoRegex.Match(
- trainInfoDiv.QuerySelector(":scope > h2")!.Text().WithCollapsedSpaces()
- ).Groups as IEnumerable).Select(group => group.Value).Skip(1);
- var (scrapedDateD, (scrapedDateM, (scrapedDateY, _))) = result.Date
- .Split('.')
- .Select(int.Parse);
- var date = new DateTime(scrapedDateY, scrapedDateM, scrapedDateD);
-
- result.Operator = (OperatorRegex.Match(
- trainInfoDiv.QuerySelector(":scope > p")!.Text().WithCollapsedSpaces()
- ).Groups as IEnumerable).Skip(1).First().Value;
-
- foreach (var groupDiv in resultsDiv.QuerySelectorAll(":scope > div")) {
- result.AddTrainGroup(group => {
- var statusDiv = groupDiv.QuerySelectorAll(":scope > div").First();
- var routeText = statusDiv.QuerySelector(":scope > h4")!.Text().WithCollapsedSpaces();
- group.ConfigureRoute(route => {
- (route.From, (route.To, _)) = (RouteRegex.Match(routeText).Groups as IEnumerable).Skip(1)
- .Select(group => group.Value);
- });
-
- try {
- var statusLineMatch =
- SlRegex.Match(statusDiv.QuerySelector(":scope > div")!.Text().WithCollapsedSpaces());
- var (slmDelay, (slmLate, (slmArrival, (slmStation, _)))) =
- (statusLineMatch.Groups as IEnumerable).Skip(1).Select(group => group.Value);
- group.MakeStatus(status => {
- status.Delay = string.IsNullOrEmpty(slmDelay) ? 0 :
- slmLate == "întârziere" ? int.Parse(slmDelay) : -int.Parse(slmDelay);
- status.Station = slmStation;
- status.State = SlStateMap[slmArrival[0]];
- });
- }
- catch {
- // ignored
- }
-
- Utils.DateTimeSequencer dtSeq = new(date.Year, date.Month, date.Day);
- var stations = statusDiv.QuerySelectorAll(":scope > ul > li");
- foreach (var station in stations) {
- group.AddStopDescription(stopDescription => {
- var (left, (middle, (right, _))) = station
- .QuerySelectorAll(":scope > div > div");
- var (stopDetails, (stopNotes, _)) = middle
- .QuerySelectorAll(":scope > div > div > div");
- stopDescription.Name = stopDetails
- .QuerySelectorAll(":scope > div")[0]
- .Text()
- .WithCollapsedSpaces();
- var scrapedKm = stopDetails
- .QuerySelectorAll(":scope > div")[1]
- .Text()
- .WithCollapsedSpaces();
- stopDescription.Km = int.Parse(
- (KmRegex.Match(scrapedKm).Groups as IEnumerable).Skip(1).First().Value
- );
- var scrapedStoppingTime = stopDetails
- .QuerySelectorAll(":scope > div")[2]
- .Text()
- .WithCollapsedSpaces();
- if (!string.IsNullOrEmpty(scrapedStoppingTime)) {
- var (stValue, (stMinsec, _)) =
- (StoppingTimeRegex.Match(scrapedStoppingTime).Groups as IEnumerable)
- .Skip(1)
- .Select(group => group.Value);
- stopDescription.StoppingTime = int.Parse(stValue);
- if (stMinsec == "min") stopDescription.StoppingTime *= 60;
- }
-
- var scrapedPlatform = stopDetails
- .QuerySelectorAll(":scope > div")[3]
- .Text()
- .WithCollapsedSpaces();
- if (!string.IsNullOrEmpty(scrapedPlatform))
- stopDescription.Platform = PlatformRegex.Match(scrapedPlatform).Groups[1].Value;
-
- void ScrapeTime(IElement element, ref TrainStopArrDep arrDep) {
- var parts = element.QuerySelectorAll(":scope > div > div > div");
- if (parts.Length == 0) throw new OperationCanceledException();
- var time = parts[0];
- var scrapedTime = time.Text().WithCollapsedSpaces();
- var (stHour, (stMin, _)) = scrapedTime.Split(':').Select(int.Parse);
- arrDep.ScheduleTime = BucharestTz.AtLeniently(dtSeq.Next(stHour, stMin).ToLocalDateTime())
- .ToDateTimeOffset();
-
- if (parts.Length < 2) return;
-
- var statusElement = parts[1];
- var (onTime, (delay, (approx, _))) = (StationArrdepStatusRegex.Match(
- statusElement.Text().WithCollapsedSpaces(replaceWith: " ")
- ).Groups as IEnumerable).Skip(1).Select(group => group.Value);
- arrDep.MakeStatus(status => {
- status.Delay = string.IsNullOrEmpty(onTime) ? int.Parse(delay) : 0;
- status.Real = string.IsNullOrEmpty(approx);
- });
- }
-
- try {
- stopDescription.MakeArrival(arrival => { ScrapeTime(left, ref arrival); });
- }
- catch (OperationCanceledException) { }
-
- try {
- stopDescription.MakeDeparture(departure => { ScrapeTime(right, ref departure); });
- }
- catch (OperationCanceledException) { }
-
- foreach (var noteDiv in stopNotes.QuerySelectorAll(":scope > div > div")) {
- var noteText = noteDiv.Text().WithCollapsedSpaces();
- Match trainNumberChangeMatch, departsAsMatch, detachingWagons, receivingWagons;
- if ((trainNumberChangeMatch = TrainNumberChangeNoteRegex.Match(noteText)).Success) {
- stopDescription.AddTrainNumberChangeNote(trainNumberChangeMatch.Groups[1].Value, trainNumberChangeMatch.Groups[2].Value);
- }
- else if ((departsAsMatch = DepartsAsNoteRegex.Match(noteText)).Success) {
- var groups = departsAsMatch.Groups;
- var departureDate = BucharestTz.AtStrictly(new(int.Parse(groups[5].Value), int.Parse(groups[4].Value), int.Parse(groups[3].Value), 0, 0));
- stopDescription.AddDepartsAsNote(groups[1].Value, groups[2].Value, departureDate.ToDateTimeOffset());
- }
- else if ((detachingWagons = DetachingWagonsNoteRegex.Match(noteText)).Success) {
- stopDescription.AddDetachingWagonsNote(detachingWagons.Groups[1].Value);
- }
- else if ((receivingWagons = ReceivingWagonsNoteRegex.Match(noteText)).Success) {
- stopDescription.AddReceivingWagonsNote(receivingWagons.Groups[1].Value);
- }
- }
- });
- }
- });
- }
- return result;
- }
- }
-} // namespace
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Net;
+using System.Net.Http;
+using System.Text.RegularExpressions;
+using System.Threading.Tasks;
+using AngleSharp;
+using AngleSharp.Dom;
+using AngleSharp.Html.Dom;
+using Flurl;
+using InfoferScraper.Models.Train;
+using NodaTime;
+using NodaTime.Extensions;
+using scraper.Exceptions;
+
+namespace InfoferScraper.Scrapers {
+ public static class TrainScraper {
+ private const string BaseUrl = "https://mersultrenurilor.infofer.ro/ro-RO/";
+ private static readonly Regex TrainInfoRegex = new(@"^([A-Z-]+)\s([0-9]+)\sîn\s([0-9.]+)$");
+ private static readonly Regex OperatorRegex = new(@"^Operat\sde\s(.+)$");
+
+ private static readonly Regex RouteRegex =
+ new(@$"^Parcurs\stren\s([{Utils.RoLetters} ]+)[-–]([{Utils.RoLetters}\s]+)$");
+
+ private static readonly Regex SlRegex =
+ new(
+ @"^(?:Fără|([0-9]+)\smin)\s(întârziere|mai\sdevreme)\sla\s(trecerea\sfără\soprire\sprin|sosirea\sîn|plecarea\sdin)\s(.+)\.$");
+
+ private static readonly Dictionary SlStateMap = new() {
+ { 't', StatusKind.Passing },
+ { 's', StatusKind.Arrival },
+ { 'p', StatusKind.Departure },
+ };
+
+ private static readonly Regex KmRegex = new(@"^km\s([0-9]+)$");
+ private static readonly Regex StoppingTimeRegex = new(@"^([0-9]+)\s(min|sec)\soprire$");
+ private static readonly Regex PlatformRegex = new(@"^linia\s(.+)$");
+
+ private static readonly Regex StationArrdepStatusRegex =
+ new(@"^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$");
+
+ private static readonly Regex TrainNumberChangeNoteRegex =
+ new(@"^Trenul își schimbă numărul în\s([A-Z-]+)\s([0-9]+)$");
+ private static readonly Regex DepartsAsNoteRegex =
+ new(@"^Trenul pleacă cu numărul\s([A-Z-]+)\s([0-9]+)\sîn\s([0-9]{2}).([0-9]{2}).([0-9]{4})$");
+ private static readonly Regex ReceivingWagonsNoteRegex =
+ new(@"^Trenul primește vagoane de la\s(.+)\.$");
+ private static readonly Regex DetachingWagonsNoteRegex =
+ new(@"^Trenul detașează vagoane pentru stația\s(.+)\.$");
+
+ private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"];
+
+ private static readonly CookieContainer CookieContainer = new();
+ private static readonly HttpClient HttpClient = new(new HttpClientHandler {
+ CookieContainer = CookieContainer,
+ UseCookies = true,
+ }) {
+ BaseAddress = new Uri(BaseUrl),
+ DefaultRequestVersion = new Version(2, 0),
+ };
+
+ public static async Task Scrape(string trainNumber, DateTimeOffset? dateOverride = null) {
+ var dateOverrideInstant = dateOverride?.ToInstant().InZone(BucharestTz);
+ dateOverride = dateOverrideInstant?.ToDateTimeOffset();
+ TrainScrapeResult result = new();
+
+ var asConfig = Configuration.Default;
+ var asContext = BrowsingContext.New(asConfig);
+
+ var firstUrl = "Tren"
+ .AppendPathSegment(trainNumber);
+ if (dateOverride != null) {
+ firstUrl = firstUrl.SetQueryParam("Date", $"{dateOverride:d.MM.yyyy}");
+ }
+ var firstResponse = await HttpClient.GetStringAsync(firstUrl);
+ var firstDocument = await asContext.OpenAsync(req => req.Content(firstResponse));
+ var firstForm = firstDocument.GetElementById("form-search")!;
+
+ var firstResult = firstForm
+ .QuerySelectorAll("input")
+ .Where(elem => elem.Name != null)
+ .ToDictionary(elem => elem.Name!, elem => elem.Value);
+
+ var secondUrl = "".AppendPathSegments("Trains", "TrainsResult");
+ var secondResponse = await HttpClient.PostAsync(
+ secondUrl,
+#pragma warning disable CS8620
+ new FormUrlEncodedContent(firstResult)
+#pragma warning restore CS8620
+ );
+ var secondResponseContent = await secondResponse.Content.ReadAsStringAsync();
+ var secondDocument = await asContext.OpenAsync(
+ req => req.Content(secondResponseContent)
+ );
+
+ var (trainInfoDiv, (_, (_, (resultsDiv, _)))) = secondDocument
+ .QuerySelectorAll("body > div");
+ if (trainInfoDiv == null) {
+ return null;
+ }
+ if (resultsDiv == null) {
+ throw new TrainNotThisDayException();
+ }
+ trainInfoDiv = trainInfoDiv.QuerySelectorAll(":scope > div > div").First();
+
+ (result.Rank, (result.Number, (result.Date, _))) = (TrainInfoRegex.Match(
+ trainInfoDiv.QuerySelector(":scope > h2")!.Text().WithCollapsedSpaces()
+ ).Groups as IEnumerable).Select(group => group.Value).Skip(1);
+ var (scrapedDateD, (scrapedDateM, (scrapedDateY, _))) = result.Date
+ .Split('.')
+ .Select(int.Parse);
+ var date = new DateTime(scrapedDateY, scrapedDateM, scrapedDateD);
+
+ result.Operator = (OperatorRegex.Match(
+ trainInfoDiv.QuerySelector(":scope > p")!.Text().WithCollapsedSpaces()
+ ).Groups as IEnumerable).Skip(1).First().Value;
+
+ foreach (var groupDiv in resultsDiv.QuerySelectorAll(":scope > div")) {
+ result.AddTrainGroup(group => {
+ var statusDiv = groupDiv.QuerySelectorAll(":scope > div").First();
+ var routeText = statusDiv.QuerySelector(":scope > h4")!.Text().WithCollapsedSpaces();
+ group.ConfigureRoute(route => {
+ (route.From, (route.To, _)) = (RouteRegex.Match(routeText).Groups as IEnumerable).Skip(1)
+ .Select(group => group.Value);
+ });
+
+ try {
+ var statusLineMatch =
+ SlRegex.Match(statusDiv.QuerySelector(":scope > div")!.Text().WithCollapsedSpaces());
+ var (slmDelay, (slmLate, (slmArrival, (slmStation, _)))) =
+ (statusLineMatch.Groups as IEnumerable).Skip(1).Select(group => group.Value);
+ group.MakeStatus(status => {
+ status.Delay = string.IsNullOrEmpty(slmDelay) ? 0 :
+ slmLate == "întârziere" ? int.Parse(slmDelay) : -int.Parse(slmDelay);
+ status.Station = slmStation;
+ status.State = SlStateMap[slmArrival[0]];
+ });
+ }
+ catch {
+ // ignored
+ }
+
+ Utils.DateTimeSequencer dtSeq = new(date.Year, date.Month, date.Day);
+ var stations = statusDiv.QuerySelectorAll(":scope > ul > li");
+ foreach (var station in stations) {
+ group.AddStopDescription(stopDescription => {
+ var (left, (middle, (right, _))) = station
+ .QuerySelectorAll(":scope > div > div");
+ var (stopDetails, (stopNotes, _)) = middle
+ .QuerySelectorAll(":scope > div > div > div");
+ stopDescription.Name = stopDetails
+ .QuerySelectorAll(":scope > div")[0]
+ .Text()
+ .WithCollapsedSpaces();
+ stopDescription.LinkName = new Flurl.Url(stopDetails
+ .QuerySelectorAll(":scope > div")[0]
+ .QuerySelector(":scope a")
+ .Attributes["href"]
+ .Value).PathSegments.Last();
+ var scrapedKm = stopDetails
+ .QuerySelectorAll(":scope > div")[1]
+ .Text()
+ .WithCollapsedSpaces();
+ stopDescription.Km = int.Parse(
+ (KmRegex.Match(scrapedKm).Groups as IEnumerable).Skip(1).First().Value
+ );
+ var scrapedStoppingTime = stopDetails
+ .QuerySelectorAll(":scope > div")[2]
+ .Text()
+ .WithCollapsedSpaces();
+ if (!string.IsNullOrEmpty(scrapedStoppingTime)) {
+ var (stValue, (stMinsec, _)) =
+ (StoppingTimeRegex.Match(scrapedStoppingTime).Groups as IEnumerable)
+ .Skip(1)
+ .Select(group => group.Value);
+ stopDescription.StoppingTime = int.Parse(stValue);
+ if (stMinsec == "min") stopDescription.StoppingTime *= 60;
+ }
+
+ var scrapedPlatform = stopDetails
+ .QuerySelectorAll(":scope > div")[3]
+ .Text()
+ .WithCollapsedSpaces();
+ if (!string.IsNullOrEmpty(scrapedPlatform))
+ stopDescription.Platform = PlatformRegex.Match(scrapedPlatform).Groups[1].Value;
+
+ void ScrapeTime(IElement element, ref TrainStopArrDep arrDep) {
+ var parts = element.QuerySelectorAll(":scope > div > div > div");
+ if (parts.Length == 0) throw new OperationCanceledException();
+ var time = parts[0];
+ var scrapedTime = time.Text().WithCollapsedSpaces();
+ var (stHour, (stMin, _)) = scrapedTime.Split(':').Select(int.Parse);
+ arrDep.ScheduleTime = BucharestTz.AtLeniently(dtSeq.Next(stHour, stMin).ToLocalDateTime())
+ .ToDateTimeOffset();
+
+ if (parts.Length < 2) return;
+
+ var statusElement = parts[1];
+ var (onTime, (delay, (approx, _))) = (StationArrdepStatusRegex.Match(
+ statusElement.Text().WithCollapsedSpaces(replaceWith: " ")
+ ).Groups as IEnumerable).Skip(1).Select(group => group.Value);
+ arrDep.MakeStatus(status => {
+ status.Delay = string.IsNullOrEmpty(onTime) ? int.Parse(delay) : 0;
+ status.Real = string.IsNullOrEmpty(approx);
+ });
+ }
+
+ try {
+ stopDescription.MakeArrival(arrival => { ScrapeTime(left, ref arrival); });
+ }
+ catch (OperationCanceledException) { }
+
+ try {
+ stopDescription.MakeDeparture(departure => { ScrapeTime(right, ref departure); });
+ }
+ catch (OperationCanceledException) { }
+
+ foreach (var noteDiv in stopNotes.QuerySelectorAll(":scope > div > div")) {
+ var noteText = noteDiv.Text().WithCollapsedSpaces();
+ Match trainNumberChangeMatch, departsAsMatch, detachingWagons, receivingWagons;
+ if ((trainNumberChangeMatch = TrainNumberChangeNoteRegex.Match(noteText)).Success) {
+ stopDescription.AddTrainNumberChangeNote(trainNumberChangeMatch.Groups[1].Value, trainNumberChangeMatch.Groups[2].Value);
+ }
+ else if ((departsAsMatch = DepartsAsNoteRegex.Match(noteText)).Success) {
+ var groups = departsAsMatch.Groups;
+ var departureDate = BucharestTz.AtStrictly(new(int.Parse(groups[5].Value), int.Parse(groups[4].Value), int.Parse(groups[3].Value), 0, 0));
+ stopDescription.AddDepartsAsNote(groups[1].Value, groups[2].Value, departureDate.ToDateTimeOffset());
+ }
+ else if ((detachingWagons = DetachingWagonsNoteRegex.Match(noteText)).Success) {
+ stopDescription.AddDetachingWagonsNote(detachingWagons.Groups[1].Value);
+ }
+ else if ((receivingWagons = ReceivingWagonsNoteRegex.Match(noteText)).Success) {
+ stopDescription.AddReceivingWagonsNote(receivingWagons.Groups[1].Value);
+ }
+ }
+ });
+ }
+ });
+ }
+ return result;
+ }
+ }
+} // namespace