Add itinerary scraping
This commit is contained in:
		
							parent
							
								
									1d9db5b491
								
							
						
					
					
						commit
						b66b8f79b1
					
				
					 7 changed files with 118 additions and 1 deletions
				
			
		|  | @ -187,6 +187,8 @@ public static class RouteScraper { | |||
| 					foreach (var div in leftSideDivs[2] | ||||
| 						         .QuerySelectorAll(":scope > div") | ||||
| 						         .Where((_, i) => i % 2 != 0)) { | ||||
| 						var text = div.Text().WithCollapsedSpaces(); | ||||
| 						if (text == "Nu sunt stații intermediare.") continue; | ||||
| 						train.AddIntermediateStop(div.Text().WithCollapsedSpaces()); | ||||
| 					} | ||||
|                      | ||||
|  |  | |||
							
								
								
									
										40
									
								
								server/Controllers/V3/ItinerariesController.cs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								server/Controllers/V3/ItinerariesController.cs
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,40 @@ | |||
| using System; | ||||
| using System.Collections.Generic; | ||||
| using System.Threading.Tasks; | ||||
| using Microsoft.AspNetCore.Http; | ||||
| using Microsoft.AspNetCore.Mvc; | ||||
| using scraper.Models.Itinerary; | ||||
| using Server.Services.Interfaces; | ||||
| 
 | ||||
| namespace Server.Controllers.V3;  | ||||
| 
 | ||||
| [ApiController] | ||||
| [ApiExplorerSettings(GroupName = "v3")] | ||||
| [Route("/v3/[controller]")]
 | ||||
| public class ItinerariesController : Controller { | ||||
| 	private IDataManager DataManager { get; } | ||||
| 	private IDatabase Database { get; } | ||||
| 
 | ||||
| 	public ItinerariesController(IDataManager dataManager, IDatabase database) { | ||||
| 		this.DataManager = dataManager; | ||||
| 		this.Database = database; | ||||
| 	} | ||||
| 
 | ||||
| 
 | ||||
| 	[HttpGet("")] | ||||
| 	[ProducesResponseType(typeof(IEnumerable<IItinerary>), StatusCodes.Status200OK)] | ||||
| 	[ProducesResponseType(StatusCodes.Status404NotFound)] | ||||
| 	public async Task<ActionResult<IEnumerable<IItinerary>>> FindItineraries( | ||||
| 		[FromQuery] string from, | ||||
| 		[FromQuery] string to, | ||||
| 		[FromQuery] DateTimeOffset? date | ||||
| 	) { | ||||
| 		var itineraries = await DataManager.FetchItineraries(from, to, date); | ||||
| 
 | ||||
| 		if (itineraries == null) { | ||||
| 			return NotFound(); | ||||
| 		} | ||||
| 
 | ||||
| 		return Ok(itineraries); | ||||
| 	} | ||||
| } | ||||
|  | @ -8,6 +8,7 @@ using Server.Services.Interfaces; | |||
| using Server.Utils; | ||||
| using InfoferScraper; | ||||
| using Microsoft.Extensions.Logging; | ||||
| using scraper.Models.Itinerary; | ||||
| 
 | ||||
| namespace Server.Services.Implementations { | ||||
| 	public class DataManager : IDataManager { | ||||
|  | @ -52,10 +53,27 @@ namespace Server.Services.Implementations { | |||
| 				} | ||||
| 				return train; | ||||
| 			}, TimeSpan.FromSeconds(30)); | ||||
| 			itinerariesCache = new(async (t) => { | ||||
| 				var (from, to, date) = t; | ||||
| 				var zonedDate = new NodaTime.LocalDate(date.Year, date.Month, date.Day).AtStartOfDayInZone(CfrTimeZone); | ||||
| 
 | ||||
| 				var itineraries = await InfoferScraper.Scrapers.RouteScraper.Scrape(from, to, zonedDate.ToDateTimeOffset()); | ||||
| 				if (itineraries != null) { | ||||
| 					_ = Task.Run(async () => { | ||||
| 						var watch = Stopwatch.StartNew(); | ||||
| 						await Database.OnItineraries(itineraries); | ||||
| 						var ms = watch.ElapsedMilliseconds; | ||||
| 						Logger.LogInformation("OnItineraries timing: {StationDataMs} ms", ms); | ||||
| 					}); | ||||
| 				} | ||||
| 
 | ||||
| 				return itineraries; | ||||
| 			}, TimeSpan.FromMinutes(1)); | ||||
| 		} | ||||
| 
 | ||||
| 		private readonly AsyncCache<(string, DateOnly), IStationScrapeResult?> stationCache; | ||||
| 		private readonly AsyncCache<(string, DateOnly), ITrainScrapeResult?> trainCache; | ||||
| 		private readonly AsyncCache<(string, string, DateOnly), IReadOnlyList<IItinerary>?> itinerariesCache; | ||||
| 
 | ||||
| 		public Task<IStationScrapeResult?> FetchStation(string stationName, DateTimeOffset date) { | ||||
| 			var cfrDateTime = new NodaTime.ZonedDateTime(NodaTime.Instant.FromDateTimeOffset(date), CfrTimeZone); | ||||
|  | @ -70,5 +88,12 @@ namespace Server.Services.Implementations { | |||
| 
 | ||||
| 			return trainCache.GetItem((trainNumber, cfrDate)); | ||||
| 		} | ||||
| 
 | ||||
| 		public async Task<IReadOnlyList<IItinerary>?> FetchItineraries(string from, string to, DateTimeOffset? date = null) { | ||||
| 			var cfrDateTime = new NodaTime.ZonedDateTime(NodaTime.Instant.FromDateTimeOffset(date ?? DateTimeOffset.Now), CfrTimeZone); | ||||
| 			var cfrDate = new DateOnly(cfrDateTime.Year, cfrDateTime.Month, cfrDateTime.Day); | ||||
| 
 | ||||
| 			return await itinerariesCache.GetItem((from, to, cfrDate)); | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| } | ||||
|  | @ -13,6 +13,7 @@ using Microsoft.Extensions.Options; | |||
| using MongoDB.Bson; | ||||
| using MongoDB.Bson.Serialization.Attributes; | ||||
| using MongoDB.Driver; | ||||
| using scraper.Models.Itinerary; | ||||
| using Server.Models.Database; | ||||
| using Server.Utils; | ||||
| 
 | ||||
|  | @ -339,6 +340,17 @@ public class Database : Server.Services.Interfaces.IDatabase { | |||
| 			await ProcessTrain(train); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
|     public async Task OnItineraries(IReadOnlyList<IItinerary> itineraries) { | ||||
|         foreach (var itinerary in itineraries) { | ||||
|             foreach (var train in itinerary.Trains) { | ||||
|                 await FoundTrainAtStations( | ||||
|                     train.IntermediateStops.Concat(new[] { train.From, train.To }), | ||||
|                     train.TrainNumber | ||||
|                 ); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| public record DbRecord( | ||||
|  |  | |||
|  | @ -1,11 +1,14 @@ | |||
| using System; | ||||
| using System.Collections.Generic; | ||||
| using System.Threading.Tasks; | ||||
| using InfoferScraper.Models.Train; | ||||
| using InfoferScraper.Models.Station; | ||||
| using scraper.Models.Itinerary; | ||||
| 
 | ||||
| namespace Server.Services.Interfaces; | ||||
| 
 | ||||
| public interface IDataManager { | ||||
| 	public Task<IStationScrapeResult?> FetchStation(string stationName, DateTimeOffset date); | ||||
| 	public Task<ITrainScrapeResult?> FetchTrain(string trainNumber, DateTimeOffset date); | ||||
| 	public Task<IReadOnlyList<IItinerary>?> FetchItineraries(string from, string to, DateTimeOffset? date = null); | ||||
| } | ||||
|  |  | |||
|  | @ -2,6 +2,7 @@ using System.Collections.Generic; | |||
| using System.Threading.Tasks; | ||||
| using InfoferScraper.Models.Train; | ||||
| using InfoferScraper.Models.Station; | ||||
| using scraper.Models.Itinerary; | ||||
| using Server.Models.Database; | ||||
| 
 | ||||
| namespace Server.Services.Interfaces; | ||||
|  | @ -15,4 +16,5 @@ public interface IDatabase { | |||
| 	public Task FoundTrainAtStation(string stationName, string trainName); | ||||
| 	public Task OnTrainData(ITrainScrapeResult trainData); | ||||
| 	public Task OnStationData(IStationScrapeResult stationData); | ||||
| 	public Task OnItineraries(IReadOnlyList<IItinerary> itineraries); | ||||
| } | ||||
|  |  | |||
							
								
								
									
										33
									
								
								server/Utils/IAsyncCusorAsyncAdapter.cs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								server/Utils/IAsyncCusorAsyncAdapter.cs
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,33 @@ | |||
| using System.Collections.Generic; | ||||
| using System.Threading.Tasks; | ||||
| using MongoDB.Driver; | ||||
| 
 | ||||
| namespace Server.Utils;  | ||||
| 
 | ||||
| public record IAsyncCusorAsyncEnumerator<T>(IAsyncCursor<T> Cursor) { | ||||
|     private IEnumerator<T>? enumerator = null; | ||||
| 
 | ||||
|     public T Current => enumerator!.Current; | ||||
| 
 | ||||
|     public async Task<bool> MoveNextAsync() { | ||||
|         bool result; | ||||
|         if (enumerator != null) { | ||||
|             result = enumerator.MoveNext(); | ||||
|             if (result) return true; | ||||
|         } | ||||
| 
 | ||||
|         result = await Cursor.MoveNextAsync(); | ||||
|         if (result) { | ||||
|             enumerator = Cursor.Current.GetEnumerator(); | ||||
|             return true; | ||||
|         } | ||||
| 
 | ||||
|         return false; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| public static class IAsyncCursorExtensions { | ||||
|     public static IAsyncCusorAsyncEnumerator<T> GetAsyncEnumerator<T>(this IAsyncCursor<T> cursor) { | ||||
|         return new(cursor); | ||||
|     } | ||||
| } | ||||
		Loading…
	
	Add table
		
		Reference in a new issue