[csw-devel] SF.net SVN: opencsw:[594] twitter/sweets/sweets.py
skayser at users.sourceforge.net
skayser at users.sourceforge.net
Thu Jan 12 00:00:58 CET 2012
Revision: 594
http://opencsw.svn.sourceforge.net/opencsw/?rev=594&view=rev
Author: skayser
Date: 2012-01-11 23:00:58 +0000 (Wed, 11 Jan 2012)
Log Message:
-----------
twitter/sweets: cleanup earlier ad-hoc mess, probably introduce some new
Modified Paths:
--------------
twitter/sweets/sweets.py
Modified: twitter/sweets/sweets.py
===================================================================
--- twitter/sweets/sweets.py 2012-01-11 22:20:38 UTC (rev 593)
+++ twitter/sweets/sweets.py 2012-01-11 23:00:58 UTC (rev 594)
@@ -30,6 +30,7 @@
"""Holds twitter response data and offers operations on it."""
def __init__(self, file="", json=""):
self.data = {}
+ self._num_tweets_dropped = 0
try:
if file and os.path.exists(file):
self.data = self._import_file(file)
@@ -125,7 +126,18 @@
"""Get list of tweets where the tweet contains a given string."""
return [ t for t in self.get_tweets() if t['text'].find(string) != -1 ]
+ def num_tweets(self):
+ return len(self.get_tweets())
+ def num_tweets_dropped(self):
+ return self._num_tweets_dropped
+
+ def drop_nonmatching_tweets(self, string):
+ n1 = self.num_tweets()
+ self.set_tweets(self.get_matching_tweets(string))
+ n2 = self.num_tweets()
+ self._num_tweets_dropped += n2 - n1
+
def twitter_search_mock(query="", state=""):
"""Search twitter (mockup), returns a single-line, JSON search result."""
f = open("sample-search-results.txt", "r")
@@ -156,19 +168,20 @@
response = TSResponse(file=outfile)
while 1:
try:
- # Unfortunately, some search results are spam (text in tweet not
- # related to query), thus we need to filter the search results
new_response = twitter_search(query, state=response.get_state())
- new_response.set_tweets(new_response.get_matching_tweets(query))
if new_response.has_tweets():
+ # Unfortunately, some search results are spam (tweet not related
+ # to query, URLs point to spam sites), drop irrelevant tweets
+ new_response.drop_nonmatching_tweets(query)
new_response.print_tweets()
response.update(new_response)
response.cap(keep)
response.export(outfile)
- print "%s new tweets, %s total in archive" % (
- len(new_response.get_tweets()),
- len(response.get_tweets()))
+ print "%s new tweets (%s dropped), %s total in archive" % (
+ new_response.num_tweets(),
+ new_response.num_tweets_dropped(),
+ response.num_tweets())
except urllib2.URLError, e:
print "Couldn't retrieve URL:", e
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
More information about the devel
mailing list