Added request entity parsers for XML and JSON (and unittests)
authorMikko Värri <vmj@linuxbox.fi>
Sat, 23 Jan 2010 22:26:40 +0000 (00:26 +0200)
committerMikko Värri <vmj@linuxbox.fi>
Sat, 23 Jan 2010 22:26:40 +0000 (00:26 +0200)
20 files changed:
recycloid_api/parsers.py [new file with mode: 0644]
recycloid_api/tests.py
recycloid_api/tests/Empty.json [new file with mode: 0644]
recycloid_api/tests/Empty.xml [new file with mode: 0644]
recycloid_api/tests/EmptyRoot.json [new file with mode: 0644]
recycloid_api/tests/EmptyRoot.xml [new file with mode: 0644]
recycloid_api/tests/Image.json [new file with mode: 0644]
recycloid_api/tests/Image.xml [new file with mode: 0644]
recycloid_api/tests/Item.json [new file with mode: 0644]
recycloid_api/tests/Item.xml [new file with mode: 0644]
recycloid_api/tests/MultiRoot.json [new file with mode: 0644]
recycloid_api/tests/MultiRoot.xml [new file with mode: 0644]
recycloid_api/tests/Server.json [new file with mode: 0644]
recycloid_api/tests/Server.xml [new file with mode: 0644]
recycloid_api/tests/Stash.json [new file with mode: 0644]
recycloid_api/tests/Stash.xml [new file with mode: 0644]
recycloid_api/tests/StringRoot.json [new file with mode: 0644]
recycloid_api/tests/StringRoot.xml [new file with mode: 0644]
recycloid_api/tests/User.json [new file with mode: 0644]
recycloid_api/tests/User.xml [new file with mode: 0644]

diff --git a/recycloid_api/parsers.py b/recycloid_api/parsers.py
new file mode 100644 (file)
index 0000000..ceb3017
--- /dev/null
@@ -0,0 +1,171 @@
+# Luckily, xml.sax.handler.ContentHandler has been there since Python
+# 2.0, so no need for any acrobatics to read XML documents.
+from xml.sax import parseString as parse_xml
+from xml.sax import SAXParseException
+from xml.sax.handler import ContentHandler
+
+# For JSON, let's try to use system provided json module (part of
+# Python 2.6, for earlier versions it can be installed separately).
+# This module is probably the fastest available.
+try:
+    from json import parse as parse_json
+except ImportError:
+    # Failing that, let's try to import the system version of
+    # simplejson, hoping that it has the C-based speedup extension
+    # enabled.
+    try:
+        from simplejson import loads as parse_json
+    except ImportError:
+        # Well, at least Django bundles a simplejson, although without
+        # any C-code.
+        from django.utils.simplejson import loads as parse_json
+
+class JsonParser(object):
+    """JSON to Python parser.
+    """
+
+    def parse(self, data):
+        """Parses 
+        """
+        assert(isinstance(data, unicode))
+
+        try:
+            o = parse_json(data)
+        except ValueError, e:
+            s = str(e)
+            if s == "No JSON object could be decoded":
+                raise ValueError("No root element found")
+            else:
+                raise NotImplementedError("BUG: parse_json raised %s who sais '%s'" % (e.__class__.__name__, str(e)))
+        except e:
+            raise NotImplementedError("BUG: parse_json raised %s who sais '%s'" % (e.__class__.__name__, str(e)))
+        else:
+            if len(o) > 1:
+                raise ValueError("Multiple root elements found")
+
+        return o
+
+class XmlParser(ContentHandler):
+    """XML to Python parser.
+
+    This parser produces same output from an XML document as JSON
+    parser produces from a JSON document.  I.e. a Python dictionary.
+
+    The dictionary will contain one key, the name of the root element.
+    The value is the Python representation of the root element, as
+    described below.
+
+    For each XML element, the Python representation is either None (if
+    the element was empty and without any attributes), or unicode
+    string (if the element had only text content and no attributes),
+    or a dictionary.
+
+    If it is a dictionary, any attributes are represented as key-value
+    pairs in the dictionary, where keys are the attribute names
+    preceded with "@" and values are the attribute values in unicode.
+    Any child elements are added with their name as key, and Python
+    representation as value.  If there's multiple child elements with
+    the same name, the name will point to a list of those child
+    representations.  Any text content will be added with "#text" key
+    and the unicode text as value.
+
+    Note that the order of child elements is not preserved (except in
+    case of multiple same name childs, and even then only the relative
+    order of those with same name).  Also note that if the element
+    contains mixture of child elements and text, all text content is
+    lumped into one string and relative order of text and child
+    elements is not preserved.  These are non-issues for Recycloid
+    protocol.
+    """
+
+    def __init__(self):
+        # Add a root tupple to simplify the parsing
+        self.stack = [(None, {})]
+
+    def parse(self, data):
+        assert(isinstance(data, unicode))
+
+        # If there is an XML declaration, and encoding attribute in
+        # it, SAX parser complains if we give it unicode object.
+        # Since Recycloid protocol mandates that the data is
+        # transferred in UTF-8, the only possibility is that the
+        # declared encoding is UTF-8.
+        import codecs
+        data = codecs.getencoder("UTF-8")(data)[0]
+
+        try:
+            parse_xml(data, self)
+        except SAXParseException, e:
+            s = str(e)
+            if s.endswith("no element found"):
+                raise ValueError("No root element found")
+            elif s.endswith("junk after document element"):
+                raise ValueError("Multiple root elements found")
+            else:
+                raise NotImplementedError("BUG: parse_xml raised %s who sais '%s'" % (e.__class__.__name__, str(e)))
+        except e:
+            raise NotImplementedError("BUG: parse_xml raised %s who sais '%s'" % (e.__class__.__name__, str(e)))
+
+        # There should be only the root tupple
+        assert(len(self.stack) == 1)
+
+        # Return the dictionary of the root tupple
+        return self.stack[0][1]
+
+    def startElement(self, name, attrs):
+        # Always assume that the element contains attributes, other
+        # elements and text content.  So let's start with a dictionary
+        # representation.  We will reduce the object to simpler
+        # representation at endElement.
+        obj = {}
+        for key, value in attrs.items():
+            obj["@%s" % key] = unicode(value)
+        self.stack.append((name, obj))
+        return
+
+    def characters(self, content):
+        name, obj = self.stack[-1]
+        if obj.has_key('#text'):
+            obj['#text'] = obj['#text'] + unicode(content)
+        else:
+            obj['#text'] = unicode(content)
+        return
+
+    def endElement(self, name):
+        _name, obj = self.stack[-1]
+        assert(_name == name)
+        self.stack.pop()
+        assert(len(self.stack) >= 1) # There must be at least the root
+
+        # Remove the leading and trailing whitespaces, and if the
+        # resulting text content is empty, remove the content
+        # altogether.
+        if obj.has_key("#text"):
+            obj["#text"] = obj["#text"].strip(" \t\n\r")
+            if len(obj["#text"]) == 0:
+                del obj["#text"]
+
+        # Reduce the object to simplest representation, either None
+        # (for empty dictionaries) or unicode (for text nodes).
+        if len(obj.keys()) == 0:
+            obj = None
+        elif len(obj.keys()) == 1 and obj.has_key("#text"):
+            obj = obj["#text"]
+
+        # Add the object in its simplest form to the parent.  Since we
+        # only reduce objects here in endElement, we can be sure that
+        # parent is still a dictionary.
+        _name, parent = self.stack[-1]
+        assert(isinstance(parent, dict))
+        if not parent.has_key(name):
+            parent[name] = obj
+        else:
+            other_obj = parent[name]
+            if not isinstance(other_obj, list):
+                # other_obj is either None, unicode or dict
+                assert(isinstance(other_obj, (None, unicode, dict)))
+                parent[name] = [other_obj, obj]
+            else:
+                # It is already a list
+                parent[name].append(obj)
+        return
index 2247054..0d9fefa 100644 (file)
-"""
-This file demonstrates two different styles of tests (one doctest and one
-unittest). These will both pass when you run "manage.py test".
+from os.path import dirname
+from django.test import TestCase
 
-Replace these with more appropriate tests for your application.
-"""
+from recycloid_api.parsers import XmlParser, JsonParser
 
-from django.test import TestCase
+#__test__ = {"doctest": """
+#Another way to test that 1 + 1 is equal to 2.
+#
+#>>> 1 + 1 == 2
+#True
+#"""}
+
+class ParserTestCase(TestCase):
+
+    def setUp(self):
+        filename = "%s/tests/%s" % (dirname(__file__), self.__class__.__name__[:-14])
+        file = open("%s.xml" % filename)
+        self.xml = unicode(file.read())
+        file.close()
+        file = open("%s.json" % filename)
+        self.json = unicode(file.read())
+        file.close()
+        return
+
+    def cmp(self, d1, d2):
+        for d in [d1, d2]:
+            self.assert_(isinstance(d, dict), "Not a dictionary")
+            self.assert_(len(d) == 1, "Too many root elements")
+        self.assert_(d1 == d2, "Differences in dictionaries")
+
+class EmptyParserTestCase(ParserTestCase):
+    def test_parse(self):
+        try:
+            XmlParser().parse(self.xml)
+        except ValueError, e:
+            pass
+            self.assert_(str(e).endswith('No root element found'), str(e))
+        except:
+            self.fail('Empty document should have raised ValueError')
+        else:
+            self.fail('Empty document should have raised ValueError')
+        try:
+            JsonParser().parse(self.json)
+        except ValueError, e:
+            self.assert_(str(e).endswith('No root element found'), str(e))
+        except:
+            self.fail('Empty document should have raised ValueError')
+        else:
+            self.fail('Empty document should have raised ValueError')
+        return
 
-class SimpleTest(TestCase):
-    def test_basic_addition(self):
-        """
-        Tests that 1 + 1 always equals 2.
-        """
-        self.failUnlessEqual(1 + 1, 2)
+class MultiRootParserTestCase(ParserTestCase):
+    def test_parse(self):
+        try:
+            XmlParser().parse(self.xml)
+        except ValueError, e:
+            self.assert_(str(e).endswith('Multiple root elements found'), str(e))
+        except:
+            self.fail('Multiple root elements should have raised ValueError')
+        else:
+            self.fail('Multiple root elements should have raised ValueError')
+        try:
+            JsonParser().parse(self.json)
+        except ValueError, e:
+            self.assert_(str(e).endswith('Multiple root elements found'), str(e))
+        except:
+            self.fail('Multiple root elements should have raised ValueError')
+        else:
+            self.fail('Multiple root elements should have raised ValueError')
+        return
 
-__test__ = {"doctest": """
-Another way to test that 1 + 1 is equal to 2.
+class EmptyRootParserTestCase(ParserTestCase):
+    def test_parse(self):
+        self.xml = XmlParser().parse(self.xml)
+        self.json = JsonParser().parse(self.json)
+        self.cmp(self.xml, self.json)
+        return
 
->>> 1 + 1 == 2
-True
-"""}
+class StringRootParserTestCase(ParserTestCase):
+    def test_parse(self):
+        self.xml = XmlParser().parse(self.xml)
+        self.json = JsonParser().parse(self.json)
+        self.cmp(self.xml, self.json)
+        return
 
+class ServerParserTestCase(ParserTestCase):
+    def test_parse(self):
+        self.xml = XmlParser().parse(self.xml)
+        self.json = JsonParser().parse(self.json)
+        self.cmp(self.xml, self.json)
+        return
+class UserParserTestCase(ParserTestCase):
+    def test_parse(self):
+        self.xml = XmlParser().parse(self.xml)
+        self.json = JsonParser().parse(self.json)
+        self.cmp(self.xml, self.json)
+        return
+class StashParserTestCase(ParserTestCase):
+    def test_parse(self):
+        self.xml = XmlParser().parse(self.xml)
+        self.json = JsonParser().parse(self.json)
+        self.cmp(self.xml, self.json)
+        return
+class ItemParserTestCase(ParserTestCase):
+    def test_parse(self):
+        self.xml = XmlParser().parse(self.xml)
+        self.json = JsonParser().parse(self.json)
+        self.cmp(self.xml, self.json)
+        return
+class ImageParserTestCase(ParserTestCase):
+    def test_parse(self):
+        self.xml = XmlParser().parse(self.xml)
+        self.json = JsonParser().parse(self.json)
+        self.cmp(self.xml, self.json)
+        return
diff --git a/recycloid_api/tests/Empty.json b/recycloid_api/tests/Empty.json
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/recycloid_api/tests/Empty.xml b/recycloid_api/tests/Empty.xml
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/recycloid_api/tests/EmptyRoot.json b/recycloid_api/tests/EmptyRoot.json
new file mode 100644 (file)
index 0000000..5705b6d
--- /dev/null
@@ -0,0 +1,3 @@
+{
+  "response": null
+}
diff --git a/recycloid_api/tests/EmptyRoot.xml b/recycloid_api/tests/EmptyRoot.xml
new file mode 100644 (file)
index 0000000..891eba9
--- /dev/null
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<response/>
diff --git a/recycloid_api/tests/Image.json b/recycloid_api/tests/Image.json
new file mode 100644 (file)
index 0000000..3534d35
--- /dev/null
@@ -0,0 +1,6 @@
+{ "image":
+  { "@uuid": "12345678-1234-1234-1234-123456789012",
+    "@item": "12345678-1234-1234-1234-123456789012",
+    "url": "https://api.recycloid.org/"
+  }
+}
diff --git a/recycloid_api/tests/Image.xml b/recycloid_api/tests/Image.xml
new file mode 100644 (file)
index 0000000..a5a8d05
--- /dev/null
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<image uuid="12345678-1234-1234-1234-123456789012" item="12345678-1234-1234-1234-123456789012">
+  <url>https://api.recycloid.org/</url>
+</image>
diff --git a/recycloid_api/tests/Item.json b/recycloid_api/tests/Item.json
new file mode 100644 (file)
index 0000000..7f4c05c
--- /dev/null
@@ -0,0 +1,11 @@
+{ "item":
+  { "@uuid": "12345678-1234-1234-1234-123456789012",
+    "@stash": "12345678-1234-1234-1234-123456789012",
+    "title": "Something about me.",
+    "description": "Something about me.",
+    "image": "https://api.recycloid.org/",
+    "expires": "01012010T01:01:01Z",
+    "modified": "01012010T01:01:01Z",
+    "created": "01012010T01:01:01Z"
+  }
+}
diff --git a/recycloid_api/tests/Item.xml b/recycloid_api/tests/Item.xml
new file mode 100644 (file)
index 0000000..148cfa1
--- /dev/null
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<item uuid="12345678-1234-1234-1234-123456789012" stash="12345678-1234-1234-1234-123456789012">
+  <title>Something about me.</title>
+  <description>Something about me.</description>
+  <image>https://api.recycloid.org/</image>
+  <expires>01012010T01:01:01Z</expires>
+  <modified>01012010T01:01:01Z</modified>
+  <created>01012010T01:01:01Z</created>
+</item>
diff --git a/recycloid_api/tests/MultiRoot.json b/recycloid_api/tests/MultiRoot.json
new file mode 100644 (file)
index 0000000..2a9aa7f
--- /dev/null
@@ -0,0 +1,4 @@
+{
+  "firstroot": null,
+  "secondroot": null
+}
diff --git a/recycloid_api/tests/MultiRoot.xml b/recycloid_api/tests/MultiRoot.xml
new file mode 100644 (file)
index 0000000..c9c5b74
--- /dev/null
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<firstroot/>
+<secondroot/>
diff --git a/recycloid_api/tests/Server.json b/recycloid_api/tests/Server.json
new file mode 100644 (file)
index 0000000..d0950f3
--- /dev/null
@@ -0,0 +1,5 @@
+{ "server":
+  { "@uuid": "12345678-1234-1234-1234-123456789012",
+    "url": "https://api.recycloid.org/"
+  }
+}
diff --git a/recycloid_api/tests/Server.xml b/recycloid_api/tests/Server.xml
new file mode 100644 (file)
index 0000000..0e06289
--- /dev/null
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<server uuid="12345678-1234-1234-1234-123456789012">
+  <url>https://api.recycloid.org/</url>
+</server>
diff --git a/recycloid_api/tests/Stash.json b/recycloid_api/tests/Stash.json
new file mode 100644 (file)
index 0000000..93d45f7
--- /dev/null
@@ -0,0 +1,13 @@
+{ "stash":
+  { "@uuid": "12345678-1234-1234-1234-123456789012",
+    "@server": "12345678-1234-1234-1234-123456789012",
+    "@user": "12345678-1234-1234-1234-123456789012",
+    "title": "Something about me.",
+    "description": "Something about me.",
+    "image": "https://api.recycloid.org/",
+    "location":
+        { "latitude": "123.123",
+          "longitude": "123.123"
+        }
+  }
+}
diff --git a/recycloid_api/tests/Stash.xml b/recycloid_api/tests/Stash.xml
new file mode 100644 (file)
index 0000000..414bc92
--- /dev/null
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<stash uuid="12345678-1234-1234-1234-123456789012"
+       server="12345678-1234-1234-1234-123456789012"
+       user="12345678-1234-1234-1234-123456789012">
+  <title>Something about me.</title>
+  <description>Something about me.</description>
+  <image>https://api.recycloid.org/</image>
+  <location>
+    <latitude>123.123</latitude>
+    <longitude>123.123</longitude>
+  </location>
+</stash>
diff --git a/recycloid_api/tests/StringRoot.json b/recycloid_api/tests/StringRoot.json
new file mode 100644 (file)
index 0000000..986745d
--- /dev/null
@@ -0,0 +1,3 @@
+{
+  "response": "Something about me."
+}
diff --git a/recycloid_api/tests/StringRoot.xml b/recycloid_api/tests/StringRoot.xml
new file mode 100644 (file)
index 0000000..ae13b5b
--- /dev/null
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<response>
+  Something about me.
+</response>
diff --git a/recycloid_api/tests/User.json b/recycloid_api/tests/User.json
new file mode 100644 (file)
index 0000000..8d82f7b
--- /dev/null
@@ -0,0 +1,7 @@
+{ "user":
+  { "@uuid": "12345678-1234-1234-1234-123456789012",
+    "@server": "12345678-1234-1234-1234-123456789012",
+    "description": "Something about me.",
+    "image": "https://api.recycloid.org/"
+  }
+}
diff --git a/recycloid_api/tests/User.xml b/recycloid_api/tests/User.xml
new file mode 100644 (file)
index 0000000..0728f22
--- /dev/null
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<user uuid="12345678-1234-1234-1234-123456789012" server="12345678-1234-1234-1234-123456789012">
+  <description>Something about me.</description>
+  <image>https://api.recycloid.org/</image>
+</user>