{"sha":"83fe31178c6b9fcdef773f520822689b8fe4544f","node_id":"MDY6Q29tbWl0MjA5NDkzMDc6ODNmZTMxMTc4YzZiOWZjZGVmNzczZjUyMDgyMjY4OWI4ZmU0NTQ0Zg==","commit":{"author":{"name":"palewire","email":"ben.welsh@gmail.com","date":"2014-07-20T16:52:26Z"},"committer":{"name":"palewire","email":"ben.welsh@gmail.com","date":"2014-07-20T16:52:26Z"},"message":"Smoothed out the tests","tree":{"sha":"db834eb611d4710cc91462fa7bb642cb31633c9d","url":"https://api.github.com/repos/palewire/storytracker/git/trees/db834eb611d4710cc91462fa7bb642cb31633c9d"},"url":"https://api.github.com/repos/palewire/storytracker/git/commits/83fe31178c6b9fcdef773f520822689b8fe4544f","comment_count":0,"verification":{"verified":false,"reason":"unsigned","signature":null,"payload":null}},"url":"https://api.github.com/repos/palewire/storytracker/commits/83fe31178c6b9fcdef773f520822689b8fe4544f","html_url":"https://github.com/palewire/storytracker/commit/83fe31178c6b9fcdef773f520822689b8fe4544f","comments_url":"https://api.github.com/repos/palewire/storytracker/commits/83fe31178c6b9fcdef773f520822689b8fe4544f/comments","author":{"login":"palewire","id":9993,"node_id":"MDQ6VXNlcjk5OTM=","avatar_url":"https://avatars.githubusercontent.com/u/9993?v=4","gravatar_id":"","url":"https://api.github.com/users/palewire","html_url":"https://github.com/palewire","followers_url":"https://api.github.com/users/palewire/followers","following_url":"https://api.github.com/users/palewire/following{/other_user}","gists_url":"https://api.github.com/users/palewire/gists{/gist_id}","starred_url":"https://api.github.com/users/palewire/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/palewire/subscriptions","organizations_url":"https://api.github.com/users/palewire/orgs","repos_url":"https://api.github.com/users/palewire/repos","events_url":"https://api.github.com/users/palewire/events{/privacy}","received_events_url":"https://api.github.com/users/palewire/received_events","type":"User","site_admin":false},"committer":{"login":"palewire","id":9993,"node_id":"MDQ6VXNlcjk5OTM=","avatar_url":"https://avatars.githubusercontent.com/u/9993?v=4","gravatar_id":"","url":"https://api.github.com/users/palewire","html_url":"https://github.com/palewire","followers_url":"https://api.github.com/users/palewire/followers","following_url":"https://api.github.com/users/palewire/following{/other_user}","gists_url":"https://api.github.com/users/palewire/gists{/gist_id}","starred_url":"https://api.github.com/users/palewire/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/palewire/subscriptions","organizations_url":"https://api.github.com/users/palewire/orgs","repos_url":"https://api.github.com/users/palewire/repos","events_url":"https://api.github.com/users/palewire/events{/privacy}","received_events_url":"https://api.github.com/users/palewire/received_events","type":"User","site_admin":false},"parents":[{"sha":"ded94a858c63972043241d6d4e3b229d3d040f21","url":"https://api.github.com/repos/palewire/storytracker/commits/ded94a858c63972043241d6d4e3b229d3d040f21","html_url":"https://github.com/palewire/storytracker/commit/ded94a858c63972043241d6d4e3b229d3d040f21"}],"stats":{"total":103,"additions":97,"deletions":6},"files":[{"sha":"9d956126fe1ad388f3a9c1d8681bb5a1de49b6fd","filename":"storytracker/analysis.py","status":"modified","additions":76,"deletions":3,"changes":79,"blob_url":"https://github.com/palewire/storytracker/blob/83fe31178c6b9fcdef773f520822689b8fe4544f/storytracker%2Fanalysis.py","raw_url":"https://github.com/palewire/storytracker/raw/83fe31178c6b9fcdef773f520822689b8fe4544f/storytracker%2Fanalysis.py","contents_url":"https://api.github.com/repos/palewire/storytracker/contents/storytracker%2Fanalysis.py?ref=83fe31178c6b9fcdef773f520822689b8fe4544f","patch":"@@ -24,6 +24,7 @@ def __init__(self, url, timestamp, html):\n # Attributes that come in handy below\n self.archive_path = None\n self._hyperlinks = []\n+ self._images = []\n \n def __eq__(self, other):\n \"\"\"\n@@ -105,7 +106,7 @@ def get_hyperlinks(self, force=False):\n if self._hyperlinks and not force:\n return self._hyperlinks\n \n- # Target the
tag if it exists since \n+ # Target the tag if it exists since\n # we don't care what's in the \n target = self.soup\n if hasattr(target, 'body'):\n@@ -135,6 +136,40 @@ def get_hyperlinks(self, force=False):\n return link_list\n hyperlinks = property(get_hyperlinks)\n \n+ def get_images(self, force=False):\n+ \"\"\"\n+ Parse the archived HTML for images and returns them as a list\n+ of Image objects.\n+\n+ The list is cached after it is first accessed.\n+\n+ Set the `force` kwargs to True to regenerate it from scratch.\n+ \"\"\"\n+ # If we already have the list, return it\n+ if self._hyperlinks and not force:\n+ return self._hyperlinks\n+\n+ # Target the tag if it exists since\n+ # we don't care what's in the \n+ target = self.soup\n+ if hasattr(target, 'body'):\n+ target = target.body\n+\n+ # Loop through all tags with src attributes\n+ # and convert them to Image objects\n+ image_list = []\n+ for img in target.findAll(\"img\", {\"src\": True}):\n+ # Create the Image object\n+ image_obj = Image(img[\"src\"])\n+\n+ # Add to the image list\n+ image_list.append(image_obj)\n+\n+ # Stuff that list in our cache and then pass it out\n+ self._images = image_list\n+ return image_list\n+ images = property(get_images)\n+\n \n class ArchivedURLSet(list):\n \"\"\"\n@@ -182,9 +217,28 @@ def __init__(self, href, string, images=[]):\n self.domain = urlparse(href).netloc\n self.images = images\n \n+ def __eq__(self, other):\n+ \"\"\"\n+ Tests whether this object is equal to something else.\n+ \"\"\"\n+ if not isinstance(other, Image):\n+ return NotImplemented\n+ if self.href == other.href:\n+ return True\n+ return False\n+\n+ def __ne__(self, other):\n+ \"\"\"\n+ Tests whether this object is unequal to something else.\n+ \"\"\"\n+ result = self.__eq__(other)\n+ if result is NotImplemented:\n+ return result\n+ return not result\n+\n def __unicode__(self):\n if len(self.href) > 40:\n- return six.text_type(self.href[:40] + \"...\")\n+ return six.text_type(\"%s...\" % self.href[:40])\n else:\n return six.text_type(self.href)\n \n@@ -196,8 +250,27 @@ class Image(UnicodeMixin):\n def __init__(self, src):\n self.src = src\n \n+ def __eq__(self, other):\n+ \"\"\"\n+ Tests whether this object is equal to something else.\n+ \"\"\"\n+ if not isinstance(other, Image):\n+ return NotImplemented\n+ if self.src == other.src:\n+ return True\n+ return False\n+\n+ def __ne__(self, other):\n+ \"\"\"\n+ Tests whether this object is unequal to something else.\n+ \"\"\"\n+ result = self.__eq__(other)\n+ if result is NotImplemented:\n+ return result\n+ return not result\n+\n def __unicode__(self):\n if len(self.src) > 40:\n- return six.text_type(self.src[:40] + \"...\")\n+ return six.text_type(\"%s...\" % self.src[:40])\n else:\n return six.text_type(self.src)"},{"sha":"e9b68118ac1ad13464ef0b32b49b379312b4a9b1","filename":"test.py","status":"modified","additions":21,"deletions":3,"changes":24,"blob_url":"https://github.com/palewire/storytracker/blob/83fe31178c6b9fcdef773f520822689b8fe4544f/test.py","raw_url":"https://github.com/palewire/storytracker/raw/83fe31178c6b9fcdef773f520822689b8fe4544f/test.py","contents_url":"https://api.github.com/repos/palewire/storytracker/contents/test.py?ref=83fe31178c6b9fcdef773f520822689b8fe4544f","patch":"@@ -9,7 +9,7 @@\n from bs4 import BeautifulSoup\n from storytracker.analysis import ArchivedURL\n from storytracker.analysis import ArchivedURLSet\n-from storytracker.analysis import Hyperlink\n+from storytracker.analysis import Hyperlink, Image\n \n \n class NullDevice():\n@@ -111,19 +111,37 @@ def test_url_creation(self):\n obj.write_gzip_to_directory(self.tmpdir)\n \n def test_url_hyperlinks(self):\n- obj = storytracker.archive(self.url, output_dir=self.tmpdir)\n+ obj = storytracker.archive(self.url)\n self.assertEqual(obj._hyperlinks, [])\n self.assertTrue(isinstance(obj.hyperlinks, list))\n self.assertEqual(obj._hyperlinks, obj.hyperlinks)\n [self.assertTrue(isinstance(a, Hyperlink)) for a in obj.hyperlinks]\n a = obj.hyperlinks[0]\n a.href\n- a.contents\n+ a.string\n a.domain\n+ if a.images:\n+ for i in a.images:\n+ self.assertTrue(isinstance(i, Image))\n+ i.src\n+ i.__unicode__()\n a.__unicode__()\n a.__str__()\n a.__repr__()\n \n+ def test_url_images(self):\n+ obj = storytracker.archive(self.url)\n+ self.assertEqual(obj._images, [])\n+ self.assertTrue(len(obj.images) > 0)\n+ self.assertTrue(isinstance(obj.images, list))\n+ self.assertEqual(obj._images, obj.images)\n+ [self.assertTrue(isinstance(i, Image)) for i in obj.images]\n+ img = obj.images[0]\n+ img.src\n+ img.__unicode__()\n+ img.__str__()\n+ img.__repr__()\n+\n def test_urlset_creation(self):\n obj = ArchivedURL(self.url, datetime.now(), \"foobar\")\n obj2 = ArchivedURL(self.url, datetime.now(), \"foobar\")"}]}