Add new status line info and replacement bus info

This commit is contained in:
Kenneth Bruen 2026-06-24 04:54:47 +02:00
parent 988df9652a
commit e4e52e711d
Signed by: kbruen
GPG key ID: C1980A470C3EE5B1
2 changed files with 112 additions and 18 deletions

40
scraper/src/Models/Train.cs Normal file → Executable file
View file

@ -40,6 +40,11 @@ namespace InfoferScraper.Models.Train {
public int Delay { get; } public int Delay { get; }
public string Station { get; } public string Station { get; }
public StatusKind State { get; } public StatusKind State { get; }
/// <summary>
/// The time when the real time report was introduced in the system
/// </summary>
public DateTimeOffset? ReportTime { get; }
public ITrainRoute? Between { get; }
} }
public interface ITrainStopDescription { public interface ITrainStopDescription {
@ -102,6 +107,9 @@ namespace InfoferScraper.Models.Train {
DetachingWagons, DetachingWagons,
ReceivingWagons, ReceivingWagons,
DepartsAs, DepartsAs,
BusReplacementStartingHere,
BusReplacement,
BusReplacementEndingHere,
} }
#region Implementations #region Implementations
@ -166,6 +174,14 @@ namespace InfoferScraper.Models.Train {
public int Delay { get; set; } public int Delay { get; set; }
public string Station { get; set; } = ""; public string Station { get; set; } = "";
public StatusKind State { get; set; } public StatusKind State { get; set; }
public DateTimeOffset? ReportTime { get; set; }
public ITrainRoute? Between { get; set; }
internal void MakeBetween(Action<TrainRoute> configurator) {
TrainRoute newRoute = new();
configurator(newRoute);
Between = newRoute;
}
} }
internal record TrainStopDescription : ITrainStopDescription { internal record TrainStopDescription : ITrainStopDescription {
@ -214,6 +230,18 @@ namespace InfoferScraper.Models.Train {
public string Station { get; set; } = ""; public string Station { get; set; } = "";
} }
class BusReplacementStartingHereNote : ITrainStopNote {
public NoteKind Kind => NoteKind.BusReplacementStartingHere;
}
class BusReplacementNote : ITrainStopNote {
public NoteKind Kind => NoteKind.BusReplacement;
}
class BusReplacementEndingHereNote : ITrainStopNote {
public NoteKind Kind => NoteKind.BusReplacementEndingHere;
}
internal void AddDepartsAsNote(string rank, string number, DateTimeOffset departureDate) { internal void AddDepartsAsNote(string rank, string number, DateTimeOffset departureDate) {
ModifyableNotes.Add(new DepartsAsNote { Rank = rank, Number = number, DepartureDate = departureDate }); ModifyableNotes.Add(new DepartsAsNote { Rank = rank, Number = number, DepartureDate = departureDate });
} }
@ -229,6 +257,18 @@ namespace InfoferScraper.Models.Train {
internal void AddDetachingWagonsNote(string station) { internal void AddDetachingWagonsNote(string station) {
ModifyableNotes.Add(new DetachingWagonsNote { Station = station }); ModifyableNotes.Add(new DetachingWagonsNote { Station = station });
} }
internal void AddBusReplacementStartingHereNote() {
ModifyableNotes.Add(new BusReplacementStartingHereNote {});
}
internal void AddBusReplacementNote() {
ModifyableNotes.Add(new BusReplacementNote {});
}
internal void AddBusReplacementEndingHereNote() {
ModifyableNotes.Add(new BusReplacementEndingHereNote {});
}
} }
public record TrainStopArrDep : ITrainStopArrDep { public record TrainStopArrDep : ITrainStopArrDep {

90
scraper/src/Scrapers/Train.cs Normal file → Executable file
View file

@ -1,4 +1,4 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Net; using System.Net;
@ -28,6 +28,10 @@ namespace InfoferScraper.Scrapers {
new( new(
@"^(?:Fără|([0-9]+)\smin)\s(întârziere|mai\sdevreme)\sla\s(trecerea\sfără\soprire\sprin|sosirea\sîn|plecarea\sdin)\s(.+)(?:\s\(Raportat\sla\s([0-9]+):([0-9]+)\))?\."); @"^(?:Fără|([0-9]+)\smin)\s(întârziere|mai\sdevreme)\sla\s(trecerea\sfără\soprire\sprin|sosirea\sîn|plecarea\sdin)\s(.+)(?:\s\(Raportat\sla\s([0-9]+):([0-9]+)\))?\.");
private static readonly Regex SlExpandedRegex =
new(
@"^(?:Fără|([0-9]+)\smin)\s(întârziere|mai\sdevreme)\sla\s(trecerea\sfără\soprire\sprin|sosirea\sîn|plecarea\sdin)\s(.+)(?:\s\(Raportat\sla\s([0-9]+):([0-9]+)\))?\.\sConform\sitinerariului,\strenul\sse\saflă\sîntre\sstațiile\s(.+)\s-\s(.+)\.\sPuteți\sapăsa\spe\sbutonul\s”Hartă”\spentru\sa\svedea\slocația.");
private static readonly Dictionary<char, StatusKind> SlStateMap = new() { private static readonly Dictionary<char, StatusKind> SlStateMap = new() {
{ 't', StatusKind.Passing }, { 't', StatusKind.Passing },
{ 's', StatusKind.Arrival }, { 's', StatusKind.Arrival },
@ -49,6 +53,12 @@ namespace InfoferScraper.Scrapers {
new(@"^Trenul primește vagoane de la\s(.+)\.$"); new(@"^Trenul primește vagoane de la\s(.+)\.$");
private static readonly Regex DetachingWagonsNoteRegex = private static readonly Regex DetachingWagonsNoteRegex =
new(@"^Trenul detașează vagoane pentru stația\s(.+)\.$"); new(@"^Trenul detașează vagoane pentru stația\s(.+)\.$");
private static readonly Regex BusReplacementStartingHereNoteRegex =
new(@"^\s*Transfer\scu\sautobuzul\sîncepând\scu\saceastă\sstație\s*$");
private static readonly Regex BusReplacementNoteRegex =
new(@"^Transfer cu autobuzul$");
private static readonly Regex BusReplacementEndingHereNoteRegex =
new(@"^\s*Transfer\scu\sautobuzul\spână\sla\saceastă\sstație\s*$");
private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"]; private static readonly DateTimeZone BucharestTz = DateTimeZoneProviders.Tzdb["Europe/Bucharest"];
@ -144,22 +154,6 @@ namespace InfoferScraper.Scrapers {
.Select(group => group.Value); .Select(group => group.Value);
}); });
try {
var statusLineMatch =
SlRegex.Match(statusDiv.QuerySelector(":scope > div")!.Text().WithCollapsedSpaces());
var (slmDelay, (slmLate, (slmArrival, (slmStation, _)))) =
(statusLineMatch.Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value);
group.MakeStatus(status => {
status.Delay = string.IsNullOrEmpty(slmDelay) ? 0 :
slmLate == "întârziere" ? int.Parse(slmDelay) : -int.Parse(slmDelay);
status.Station = slmStation;
status.State = SlStateMap[slmArrival[0]];
});
}
catch {
// ignored
}
Utils.DateTimeSequencer dtSeq = new(date.Year, date.Month, date.Day); Utils.DateTimeSequencer dtSeq = new(date.Year, date.Month, date.Day);
var stations = statusDiv.QuerySelectorAll(":scope > ul > li"); var stations = statusDiv.QuerySelectorAll(":scope > ul > li");
foreach (var station in stations) { foreach (var station in stations) {
@ -242,7 +236,7 @@ namespace InfoferScraper.Scrapers {
foreach (var noteDiv in stopNotes.QuerySelectorAll(":scope > div > div")) { foreach (var noteDiv in stopNotes.QuerySelectorAll(":scope > div > div")) {
var noteText = noteDiv.Text().WithCollapsedSpaces(); var noteText = noteDiv.Text().WithCollapsedSpaces();
Match trainNumberChangeMatch, departsAsMatch, detachingWagons, receivingWagons; Match trainNumberChangeMatch, departsAsMatch, detachingWagons, receivingWagons, busReplacementStart, busReplacement, busReplacementEnd;
if ((trainNumberChangeMatch = TrainNumberChangeNoteRegex.Match(noteText)).Success) { if ((trainNumberChangeMatch = TrainNumberChangeNoteRegex.Match(noteText)).Success) {
stopDescription.AddTrainNumberChangeNote(trainNumberChangeMatch.Groups[1].Value, trainNumberChangeMatch.Groups[2].Value); stopDescription.AddTrainNumberChangeNote(trainNumberChangeMatch.Groups[1].Value, trainNumberChangeMatch.Groups[2].Value);
} }
@ -257,9 +251,69 @@ namespace InfoferScraper.Scrapers {
else if ((receivingWagons = ReceivingWagonsNoteRegex.Match(noteText)).Success) { else if ((receivingWagons = ReceivingWagonsNoteRegex.Match(noteText)).Success) {
stopDescription.AddReceivingWagonsNote(receivingWagons.Groups[1].Value); stopDescription.AddReceivingWagonsNote(receivingWagons.Groups[1].Value);
} }
else if ((busReplacementStart = BusReplacementStartingHereNoteRegex.Match(noteText)).Success) {
stopDescription.AddBusReplacementStartingHereNote();
}
else if ((busReplacement = BusReplacementNoteRegex.Match(noteText)).Success) {
stopDescription.AddBusReplacementNote();
}
else if ((busReplacementEnd = BusReplacementEndingHereNoteRegex.Match(noteText)).Success) {
stopDescription.AddBusReplacementEndingHereNote();
}
} }
}); });
} }
try {
var statusLineText = statusDiv.QuerySelector(":scope > div")!.Text().WithCollapsedSpaces();
var statusLineExpandedMatch = SlExpandedRegex.Match(statusLineText);
var statusLineMatch = SlRegex.Match(statusLineText);
var realMatch = statusLineExpandedMatch.Success ? statusLineExpandedMatch
: statusLineMatch.Success ? statusLineMatch
: null;
if (realMatch != null) {
var (slmDelay, (slmLate, (slmArrival, (slmStation,
(slmReportH, (slmReportM, (slmBetweenF, (slmBetweenT, _)))))))) =
(realMatch.Groups as IEnumerable<Group>).Skip(1).Select(group => group.Value);
group.MakeStatus(status => {
status.Delay = string.IsNullOrEmpty(slmDelay) ? 0 :
slmLate == "întârziere" ? int.Parse(slmDelay) : -int.Parse(slmDelay);
status.Station = slmStation;
status.State = SlStateMap[slmArrival[0]];
if (!string.IsNullOrEmpty(slmReportH) && !string.IsNullOrEmpty(slmReportM)) {
var firstDeparture = group.Stations[0].Departure!.ScheduleTime;
var potentialReportTime = BucharestTz
.AtLeniently(
new DateTime(firstDeparture.Year, firstDeparture.Month, firstDeparture.Day, int.Parse(slmReportH), int.Parse(slmReportM), 0)
.ToLocalDateTime()
)
.ToDateTimeOffset();
if (potentialReportTime < firstDeparture) {
// Assume no reports come in before the train departs
var nextDay = firstDeparture.AddDays(1);
potentialReportTime = BucharestTz
.AtLeniently(
new DateTime(nextDay.Year, nextDay.Month, nextDay.Day, potentialReportTime.Hour, potentialReportTime.Minute, 0)
.ToLocalDateTime()
)
.ToDateTimeOffset();
}
status.ReportTime = potentialReportTime;
}
if (!string.IsNullOrEmpty(slmBetweenF) && !string.IsNullOrEmpty(slmBetweenT)) {
status.MakeBetween(between => {
between.From = slmBetweenF;
between.To = slmBetweenT;
});
}
});
}
}
catch {
// ignored
}
}); });
} }
return result; return result;