diff --git a/.gitignore b/.gitignore
index 136772a..ca22d60 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,4 @@ dton-warcs
 *.pyc
 *.csv
 *.tsv
+.~lock*
diff --git a/README.md b/README.md
index 2611c1e..72c4f4e 100644
--- a/README.md
+++ b/README.md
@@ -3,8 +3,6 @@ handclassifier
 A quick-and-dirty python GUI for facilitating hand-classifying text and
 web content into arbitrary categories.
 
-This is still rudimentary and the API should not be considered stable.
-
 The basic framework is to use a tkinter gui window to present the possible
 classes for each document, with the document itself presented in another
 window:
@@ -19,7 +17,9 @@ window:
   from a MongoDB instance
 
 This code is largely by Tom Nicholls, based upon earlier work by Jonathan
-Bright.
+Bright. Some example scripts are provided, together with a related piece of
+code which classifies pairs of content against each other; this is earlier and
+very rough, but may prove interesting.
 
 Copyright 2013-2015, Tom Nicholls and Jonathan Bright
 contact: tom.nicholls@oii.ox.ac.uk
diff --git a/article_list_create.py b/article_list_create.py
deleted file mode 100755
index cd66828..0000000
--- a/article_list_create.py
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/usr/bin/python
-infiles = ("mail-out.txt", "sun-out.txt",
-           "bbc-out.txt", "telegraph-out.txt",
-           "mirror-out.txt", "express-out.txt",
-           "guardian-out.txt")
-
-#from a file handle get an article and associated meta info
-def get_article(handle):
-
-    #Cycle through any garbage until the next header or EOF
-    while True:
-        line = handle.readline()
-        
-        if not line:
-            return "EOF", "EOF"
-
-        if line[0:4] == "####":
-            break
-
-    #Now read the header
-    #Format: http://www.express.co.uk/news/showbiz/392805/Victoria-Beckham-celebrates-turning-39-with-a-trip-to-Nobu-with-her-children;http://www.express.co.uk/;Victoria Beckham celebrates turning 39 with a trip to Nobu with her children | Showbiz | News | Daily Express;2013-04-18T16:49:37Z;2013-04-18T16:49:37Z
-    line = handle.readline()
-    cells = line.split(";")
-
-    header = {}
-    header["Link"] = cells[0]
-    header["Site"] = cells[1]
-    header["Title"] = cells[2]
-    header["Date"] = cells[3]
-
-    #Next line should be four hashes
-    line = handle.readline()
-    if not "####" in line:
-        print "Four hash check failed with", line
-
-    #Next line is the article
-    article = handle.readline()
-
-    return article, header
-
-master_list = open("articles_list_large.csv", "w")
-print "Creating master list"
-for infile in infiles:
-
-    handle = open(infile, "r")
-
-    while(True):
-
-        article, header = get_article(handle)
-
-        if article == "EOF":
-            break
-
-        master_list.write(header["Link"])
-        master_list.write(",")
-        master_list.write(header["Title"].replace(",", ""))
-        master_list.write(",")
-        master_list.write(header["Date"])
-        master_list.write(",")
-        master_list.write(article.replace(",", ""))
-        master_list.write("\n")
-
-    handle.close()
-master_list.close()
-##print "Created"
diff --git a/darlington_classifier.py b/darlington_classifier.py
index 628e7ae..70894ca 100755
--- a/darlington_classifier.py
+++ b/darlington_classifier.py
@@ -11,8 +11,38 @@
 # This can be installed with 'pip install warctools'. Beware that there are
 # several old versions floating around under different names in the index.
 from hanzo.warctools import WarcRecord
-from warcresponseparse import *
-
+from hanzo.httptools import RequestMessage, ResponseMessage
+
+#####
+#UTILITY FUNCTIONS
+#####
+def parse_http_response(record):
+    """Parses the payload of an HTTP 'response' record, returning code,
+    content type and body.
+
+    Adapted from github's internetarchive/warctools hanzo/warcfilter.py,
+    commit 1850f328e31e505569126b4739cec62ffa444223. MIT licenced."""
+    message = ResponseMessage(RequestMessage())
+    remainder = message.feed(record.content[1])
+    message.close()
+    if remainder or not message.complete():
+        if remainder:
+            print 'trailing data in http response for', record.url
+        if not message.complete():
+            print 'truncated http response for', record.url
+    header = message.header
+
+    mime_type = [v for k,v in header.headers if k.lower() == b'content-type']
+    if mime_type:
+        mime_type = mime_type[0].split(b';')[0]
+    else:
+        mime_type = None
+
+    return header.code, mime_type, message.get_body()
+
+#####
+#MAIN
+#####
 categories = ("1 - Information transmission",
               "2 - Electronic service delivery",
               "3 - Participation and collaboration",
diff --git a/govUK_classifier.py b/govUK_classifier.py
index d60a733..77f0836 100755
--- a/govUK_classifier.py
+++ b/govUK_classifier.py
@@ -48,11 +48,6 @@
         # the Wayback classfier as it's fetched through the Wayback index.
         # Not sending it through here as the second part of the tuple
         # saves a good deal of memory.
-        # TODO: Could make this a FilePart or similar to vastly
-        # reduce the memory load if this is a problem.
-        # TODO: Could change interface to pass the mimetype - maybe
-        # make it easier to send to an appropriate program, or to name
-        # the file correctly when it's sent to a web browser?
         content.append((row[0],None))
 
 # Shuffle content so it's not in alphabetical order for classifying
diff --git a/handclassifier/handclassifier.py b/handclassifier/handclassifier.py
index efe68bb..7c86e18 100755
--- a/handclassifier/handclassifier.py
+++ b/handclassifier/handclassifier.py
@@ -1,8 +1,6 @@
 """A quick-and-dirty python GUI for facilitating hand-classifying text and
 web content into arbitrary categories.
 
-This is still rudimentary and the API should not be considered stable.
-
 The basic framework is to use a tkinter gui window to present the possible
 classes for each document, with the document itself presented in another
 window:
diff --git a/news_classifier.py b/news_classifier.py
deleted file mode 100755
index e9d3a2d..0000000
--- a/news_classifier.py
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/usr/bin/python
-# Categorise news content
-
-import Tkinter
-import handclassifier
-import datetime
-
-categories = ("CrappyCat1",
-              "MediocreCat2",
-              "CompetentCat3",
-              "MagnificentCat4")
-
-
-strFormat="%Y-%m-%dT%H:%M:%SZ"
-#start date chosen by hand though without any real reason
-start_date = datetime.strptime("2013-04-27T12:00:00Z", strFormat)
-
-#master = Tk()
-
-#Load all the articles into memory first
-print "Loading articles"
-path = ''
-articles = []
-master_list = open(path + "articles_list_large.csv", "r")
-total = 0
-
-for line in master_list:
-    cells = line.split(",")
- 
-    
-    try:
-        dt = datetime.strptime(cells[2].strip(), strFormat)
-    #some noise in this field
-    except:
-        continue
-
-    #read article into memory if it is in the window (4 hours)
-    if dt > start_date and (dt - start_date).days <= 0 and (dt - start_date).seconds <= 14400:
-        articles.append( (cells[1].strip(), cells[3].strip()) )
-
-print "There are", len(articles), "objects to classify."
-
-try:
-    output = open(path + "story_pairs.csv", "r")
-    #first check how many pairs have already been done
-    completed = 0
-    for line in output:
-        completed = completed + 1
-
-    output.close()
-    print completed, "articles already completed"
-
-    articles = articles[completed:]
-
-except:
-    print "Nothing classified yet"
-
-#Now we are ready to classify
-output = open(path + "story_pairs.csv", "a")
-
-#Initialise and run the GUI
-classifier = ManualHTMLClassifierSingle(articles, categories, output)
-Tkinter.mainloop()
-output.close()
-
-        
diff --git a/warcresponseparse.py b/warcresponseparse.py
deleted file mode 100644
index 60d041d..0000000
--- a/warcresponseparse.py
+++ /dev/null
@@ -1,33 +0,0 @@
-"""Utility functions derived from and for use with the hanzo warcutils
-library; unhelpfully not packaged as part of that library, only with
-the example scripts which accompany it"""
-
-from hanzo.httptools import RequestMessage, ResponseMessage
-
-#####
-#UTILITY FUNCTIONS
-#####
-def parse_http_response(record):
-    """Parses the payload of an HTTP 'response' record, returning code,
-    content type and body.
-
-    Adapted from github's internetarchive/warctools hanzo/warcfilter.py,
-    commit 1850f328e31e505569126b4739cec62ffa444223. MIT licenced."""
-    message = ResponseMessage(RequestMessage())
-    remainder = message.feed(record.content[1])
-    message.close()
-    if remainder or not message.complete():
-        if remainder:
-            print 'trailing data in http response for', record.url
-        if not message.complete():
-            print 'truncated http response for', record.url
-    header = message.header
-
-    mime_type = [v for k,v in header.headers if k.lower() == b'content-type']
-    if mime_type:
-        mime_type = mime_type[0].split(b';')[0]
-    else:
-        mime_type = None
-
-    return header.code, mime_type, message.get_body()
-
diff --git a/warcresponseparse.pyc b/warcresponseparse.pyc
deleted file mode 100644
index b8e1308..0000000
Binary files a/warcresponseparse.pyc and /dev/null differ