Fixed parsing + added support for seconds stopping time
This commit is contained in:
parent
dd565f1331
commit
ddf9c27cc3
6 changed files with 15 additions and 8 deletions
|
@ -14,7 +14,7 @@ RO_LETTERS = r'A-Za-zăâîșțĂÂÎȚȘ'
|
||||||
|
|
||||||
STATION_INFO_REGEX = re.compile(rf'^([{RO_LETTERS}. ]+) în ([0-9.]+)$')
|
STATION_INFO_REGEX = re.compile(rf'^([{RO_LETTERS}. ]+) în ([0-9.]+)$')
|
||||||
|
|
||||||
STOPPING_TIME_REGEX = re.compile(r'^(necunoscută \(stație terminus\))|(?:([0-9]+) min \((?:începând cu|până la) ([0-9]{1,2}:[0-9]{2})\))$')
|
STOPPING_TIME_REGEX = re.compile(r'^(necunoscută \(stație terminus\))|(?:([0-9]+) (min|sec) \((?:începând cu|până la) ([0-9]{1,2}:[0-9]{2})\))$')
|
||||||
|
|
||||||
# endregion
|
# endregion
|
||||||
|
|
||||||
|
@ -62,13 +62,14 @@ def scrape(station_name: str):
|
||||||
st_hr, st_min = (int(comp) for comp in result['time'].split(':'))
|
st_hr, st_min = (int(comp) for comp in result['time'].split(':'))
|
||||||
result['time'] = tz.localize(dt_seq(st_hr, st_min)).isoformat()
|
result['time'] = tz.localize(dt_seq(st_hr, st_min)).isoformat()
|
||||||
|
|
||||||
unknown_st, st, st_opposite_time = STOPPING_TIME_REGEX.match(
|
unknown_st, st, minsec, st_opposite_time = STOPPING_TIME_REGEX.match(
|
||||||
collapse_space(stopping_time_div.div('div', recursive=False)[1].text)
|
collapse_space(stopping_time_div.div('div', recursive=False)[1].text)
|
||||||
).groups()
|
).groups()
|
||||||
if unknown_st:
|
if unknown_st:
|
||||||
result['stoppingTime'] = None
|
result['stoppingTime'] = None
|
||||||
elif st:
|
elif st:
|
||||||
result['stoppingTime'] = int(st)
|
minutes = minsec == 'min'
|
||||||
|
result['stoppingTime'] = int(st) * 60 if minutes else int(st)
|
||||||
|
|
||||||
result['train'] = {}
|
result['train'] = {}
|
||||||
result['train']['rank'] = collapse_space(train_div.div.div('div', recursive=False)[1].span.text)
|
result['train']['rank'] = collapse_space(train_div.div.div('div', recursive=False)[1].span.text)
|
||||||
|
|
|
@ -53,11 +53,12 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"stoppingTime": {
|
"stoppingTime": {
|
||||||
|
"description": "The number of seconds the train stops in the station",
|
||||||
"type": [
|
"type": [
|
||||||
"integer",
|
"integer",
|
||||||
"null"
|
"null"
|
||||||
],
|
],
|
||||||
"minimum": 1
|
"minimum": 0
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": [
|
"required": [
|
||||||
|
|
|
@ -29,7 +29,7 @@ KM_REGEX = re.compile(r'^km ([0-9]+)$')
|
||||||
|
|
||||||
PLATFORM_REGEX = re.compile(r'^linia (.+)$')
|
PLATFORM_REGEX = re.compile(r'^linia (.+)$')
|
||||||
|
|
||||||
STOPPING_TIME_REGEX = re.compile(r'^([0-9]+) min oprire$')
|
STOPPING_TIME_REGEX = re.compile(r'^([0-9]+) (min|sec) oprire$')
|
||||||
|
|
||||||
STATION_DEPARR_STATUS_REGEX = re.compile(r'^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$')
|
STATION_DEPARR_STATUS_REGEX = re.compile(r'^(?:(la timp)|(?:((?:\+|-)[0-9]+) min \((?:(?:întârziere)|(?:mai devreme))\)))(\*?)$')
|
||||||
|
|
||||||
|
@ -106,7 +106,10 @@ def scrape(train_no: int, use_yesterday=False, date_override=None):
|
||||||
if not station_scraped['stoppingTime']:
|
if not station_scraped['stoppingTime']:
|
||||||
station_scraped['stoppingTime'] = None
|
station_scraped['stoppingTime'] = None
|
||||||
else:
|
else:
|
||||||
station_scraped['stoppingTime'] = int(STOPPING_TIME_REGEX.match(station_scraped['stoppingTime']).groups()[0])
|
st_value, st_minsec = STOPPING_TIME_REGEX.match(station_scraped['stoppingTime']).groups()
|
||||||
|
station_scraped['stoppingTime'] = int(st_value)
|
||||||
|
if st_minsec == 'min':
|
||||||
|
station_scraped['stoppingTime'] *= 60
|
||||||
station_scraped['platform'] = collapse_space(middle.div.div('div', recursive=False)[0]('div', recursive=False)[3].text)
|
station_scraped['platform'] = collapse_space(middle.div.div('div', recursive=False)[0]('div', recursive=False)[3].text)
|
||||||
if not station_scraped['platform']:
|
if not station_scraped['platform']:
|
||||||
station_scraped['platform'] = None
|
station_scraped['platform'] = None
|
||||||
|
|
|
@ -113,7 +113,7 @@
|
||||||
"stoppingTime": {
|
"stoppingTime": {
|
||||||
"description": "The number of minutes the train is scheduled to stop in this station",
|
"description": "The number of minutes the train is scheduled to stop in this station",
|
||||||
"type": ["integer", "null"],
|
"type": ["integer", "null"],
|
||||||
"minimum": 1
|
"minimum": 0
|
||||||
},
|
},
|
||||||
"platform": {
|
"platform": {
|
||||||
"description": "The platform the train stopped at",
|
"description": "The platform the train stopped at",
|
||||||
|
|
|
@ -111,7 +111,7 @@
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
"stoppingTime": {
|
"stoppingTime": {
|
||||||
"description": "The number of minutes the train is scheduled to stop in this station",
|
"description": "The number of seconds the train is scheduled to stop in this station",
|
||||||
"type": ["integer", "null"],
|
"type": ["integer", "null"],
|
||||||
"minimum": 1
|
"minimum": 1
|
||||||
},
|
},
|
||||||
|
|
|
@ -43,6 +43,8 @@ def get_train_info(train_no: int):
|
||||||
if result['stations'][i]['departure']:
|
if result['stations'][i]['departure']:
|
||||||
date = datetime.datetime.fromisoformat(result['stations'][i]['departure']['scheduleTime'])
|
date = datetime.datetime.fromisoformat(result['stations'][i]['departure']['scheduleTime'])
|
||||||
result['stations'][i]['departure']['scheduleTime'] = f'{date.hour}:{date.minute:02}'
|
result['stations'][i]['departure']['scheduleTime'] = f'{date.hour}:{date.minute:02}'
|
||||||
|
if 'stoppingTime' in result['stations'][i] and result['stations'][i]['stoppingTime']:
|
||||||
|
result['stations'][i]['stoppingTime'] //= 60
|
||||||
|
|
||||||
return result
|
return result
|
||||||
if train_no not in train_data_cache:
|
if train_no not in train_data_cache:
|
||||||
|
|
Loading…
Add table
Reference in a new issue