(See the newer version here: CodeReview question markdown downloader)
As an adjunct to From new Q to compiler in 30 seconds, I've created a Python script to automatically download the markdown from any question on Code Review and save it to a local file using Unix-style line endings.
For instance, to fetch the markdown for this question, one could write:
python fetchQ.py 124479 fetchquestion.md
I'm interested in a general review including style, error handling or any other thing that could be improved.
fetchQ.py
""" Code Review question fetcher. Given the number of the question, uses
the StackExchange API version 2.2 to fetch the markdown of the question and
write it to a local file with the name given as the second argument. """
import sys
import urllib
import StringIO
import gzip
import json
import HTMLParser
def make_URL(qnumber):
return 'https://api.stackexchange.com/2.2/questions/'+str(qnumber)+'/?order=desc&sort=activity&site=codereview&filter=!)5IYc5cM9scVj-ftqnOnMD(3TmXe'
def fetch_compressed_data(url):
compressed = urllib.urlopen(url).read()
stream = StringIO.StringIO(compressed)
data = gzip.GzipFile(fileobj=stream).read()
return data
def fetch_question_markdown(qnumber):
url = make_URL(qnumber)
try:
data = fetch_compressed_data(url)
except IOError as (err):
print "Error: {0}: while fetching data from {1}".format(err, url)
sys.exit()
try:
m = json.loads(data)
except ValueError as (err):
print "Error: {0}".format(err)
sys.exit()
try:
body = m['items'][0]['body_markdown']
except KeyError:
print "Error: item list was empty; bad question number?"
sys.exit()
except IndexError:
print "Error: response does not contain markdown; bad question number?"
sys.exit()
h = HTMLParser.HTMLParser()
md = h.unescape(body)
return md
if __name__ == '__main__':
if len(sys.argv) != 3:
print('Usage: fetchQ questionnumber mdfilename')
sys.exit()
qnumber, qname = sys.argv[1:3]
md = fetch_question_markdown(qnumber)
with open(qname, 'wb') as f:
f.write(md.encode('utf-8').replace('\r\n','\n'))
Note: This code and its companion C++ project are now available in a github repo.