dublinbus-display/refresh_feed.py

114 lines
4.1 KiB
Python

# Refresh the feed file from the original source
# Only download the file if the source is newer than the local copy
# This code was adapted from https://forums.raspberrypi.com/viewtopic.php?t=152226#p998268
import email.utils
import os
import sys
import time
import requests
import urllib3
# First we construct a handful of functions - testing happens down at the end
def httpdate_to_ts(dt):
time_tuple = email.utils.parsedate_tz(dt)
return 0 if time_tuple is None else email.utils.mktime_tz(time_tuple)
def ts_to_httpdate(ts):
return email.utils.formatdate(timeval=ts, localtime=False, usegmt=True)
# v1: download remote file if HTTP's Last-Modified header indicates that
# the file has been updated. This requires the remote server to support
# sending the Last-Modified header.
#
def update_local_file_from_url_v1(last_mtime, local_file, url):
# Check the status of the remote file without downloading it
r1 = requests.head(url)
if r1.status_code != requests.codes.ok:
# http request failed
print('HEY! get for {} returned {}'.format(url, r1.status_code),
file=sys.stderr)
return False, last_mtime
# Get the modification time for the file, if possible
if 'Last-Modified' in r1.headers:
mtime = httpdate_to_ts(r1.headers['Last-Modified'])
else:
print('HEY! no Last-Modified header for {}'.format(url),
file=sys.stderr)
return False, last_mtime
# If file is newer than last one we saw, get it
updated = False
print('Comparing feed mtimes: feed: {} vs remote {}'.format(str(last_mtime), str(mtime)), file=sys.stderr)
if not last_mtime or mtime > int(last_mtime):
print('Refreshing feed..', file=sys.stderr)
updated = True
# download the new file content
conn = urllib3.connection_from_url(url)
r2 = conn.request(method="GET", url=url, preload_content=False)
if r2.status != 200:
# http request failed
print('HEY! get for {} returned {}'.format(url, r2.status_code),
file=sys.stderr)
try:
r2.release_conn()
except Exception as e:
print('Could not release connection to {}: {}'.format(url, str(e)))
return False, last_mtime
with open(local_file,'bw') as f:
for chunk in r2.stream(amt=65536, decode_content=True):
f.write(chunk)
r2.release_conn()
# Change the mtime of the file
os.utime(local_file, (mtime, mtime))
# write new content to local file
print('Downloaded {}.'.format(local_file), file=sys.stderr)
else:
print('No need to refresh feed.', file=sys.stderr)
return updated, mtime
# v2: download remote file conditionally, with HTTP's If-Modified-Since header.
# This requires the remote server to support both sending the Last-Modified
# header and receiving the If-Modified-Since header.
#
def update_local_file_from_url_v2(last_mtime, local_file, url):
# Get the remote file, but only if it has changed
r = requests.get(url, headers={
'If-Modified-Since': ts_to_httpdate(last_mtime)
})
updated, mtime = False, last_mtime
if r.status_code == requests.codes.ok:
# File is updated and we just downloaded the content
updated = True
# write new content to local file
write_file_with_time(local_file, r.content, mtime)
# Update our notion of the file's last modification time
if 'Last-Modified' in r.headers:
mtime = httpdate_to_ts(r.headers['Last-Modified'])
else:
print('HEY! no Last-Modified header for {}'.format(url),
file=sys.stderr)
elif r.status_code == requests.codes.not_modified:
# Successful call, but no updates to file
print('As of {}, server says {} is the same'.format(time.ctime(), url))
else:
# http request failed
print('HEY! get for {} returned {}'.format(url, r.status_code),
file=sys.stderr)
return updated, mtime