[csw-devel] SF.net SVN: opencsw:[575] twitter/sweets/sweets.py
skayser at users.sourceforge.net
skayser at users.sourceforge.net
Wed Oct 26 11:51:35 CEST 2011
Revision: 575
http://opencsw.svn.sourceforge.net/opencsw/?rev=575&view=rev
Author: skayser
Date: 2011-10-26 09:51:35 +0000 (Wed, 26 Oct 2011)
Log Message:
-----------
twitter/sweets: add JSON file storage for found tweets
Modified Paths:
--------------
twitter/sweets/sweets.py
Modified: twitter/sweets/sweets.py
===================================================================
--- twitter/sweets/sweets.py 2011-10-25 09:57:29 UTC (rev 574)
+++ twitter/sweets/sweets.py 2011-10-26 09:51:35 UTC (rev 575)
@@ -5,18 +5,19 @@
# can be used for further processing (e.g. to generate RSS)
#
# TODO:
+# * Persist state across invocations to make it cron (and Twitter) friendly
# * Add logging for debug purposes (URL fetches, reponses received)
-# * run: write or append result JSON to output file
# * Rewrite file open/close blocks with "with" once python2.5+ on dev box
#
import json
import urllib
import urllib2
+import os
import sys
+import tempfile
import time
import types
-import StringIO
from optparse import OptionParser,SUPPRESS_HELP
def twitter_search_mock(query="", since=""):
@@ -33,56 +34,99 @@
url_params.append(( "q" , query ))
if since_id: url_params.append(( "since_id" , since_id))
url = url_base + "?" + urllib.urlencode(url_params)
+ print "Querying", url
output = urllib2.urlopen(url).read()
return output
twitter_search = twitter_search_real
-def get_results(query, state):
+def get_tweets(query, state):
"""Trigger twitter search and extract results from JSON response."""
json_data = twitter_search(query, state.get('max_seen'))
response = json.read(json_data)
state['max_seen'] = response.get('max_id')
return response['results']
-def run(interval, query, state={}):
+def read_jsonfile(file):
+ """Read an object from a JSON file."""
+ if not os.path.exists(file): return []
+ f = open(file, "r")
+ content = f.read()
+ f.close()
+ return json.read(content)
+
+def write_jsonfile(destfile, data):
+ """Write an object to a JSON file."""
+ (fd, name) = tempfile.mkstemp()
+ f = os.fdopen(fd, 'w')
+ f.write(json.write(data).encode('utf-8'))
+ f.close()
+ os.rename(name, destfile)
+
+def dedup_tweets(tweets):
+ """Traverse list of tweets, de-dup them by ID and return new list."""
+ seen = set()
+ dtweets = []
+ for t in tweets:
+ if t['id'] in seen: continue
+ seen.add(t['id'])
+ dtweets.append(t)
+ return dtweets
+
+def log_tweet(tweet):
+ # Tweet URL #http://twitter.com/#!/<from_user>/status/<id_str>
+ msg = "New tweet: \"%s...\" http://twitter.com/#!/%s/status/%s" % (
+ tweet['text'][0:40],
+ tweet['from_user'],
+ tweet['id_str']
+ )
+ print msg.encode(sys.stdout.encoding, 'replace')
+
+def run(interval, query, outfile, state={}):
"""Run the main loop which queries Twitter and writes the output."""
+ tweets = read_jsonfile(outfile)
while 1:
try:
- results = get_results(query, state)
- for result in results:
- # Tweet URL #http://twitter.com/#!/<from_user>/status/<id_str>
- msg = "%s - http://twitter.com/#!/%s/status/%s" % (
- result['text'],
- result['from_user'],
- result['id_str']
- )
- print msg.encode(sys.stdout.encoding, 'replace')
+ new_tweets = get_tweets(query, state)
+ if new_tweets:
+ tweets.extend(new_tweets)
+ tweets = dedup_tweets(tweets)
+ for t in new_tweets: log_tweet(t)
+ write_jsonfile(outfile, tweets)
+ else:
+ print "No new tweets (%s in archive)" % len(tweets)
except urllib2.URLError, e:
print "Couldn't retrieve URL:", e
except json.ReadException, e:
print "Couldn't read JSON response", e
+ except json.WriteException, e:
+ print "Couldn't write JSON output", e
if hasattr(interval, "__call__"): interval()
else: time.sleep(float(interval))
if __name__ == '__main__':
- usage = "%prog -s <searchterm> -t <secs>"
- parser = OptionParser(usage=usage)
- parser.add_option("-s", dest="searchterm", help="search term to run on Twitter")
- parser.add_option("-t", dest="interval", default=300,
- help="query every X seconds (default 300)")
- parser.add_option("-d", dest="debug", action="store_true",
- help=SUPPRESS_HELP)
- (options, args) = parser.parse_args()
+ usage = "%prog -s <query> [options]"
+ op = OptionParser(usage=usage)
+ op.add_option("-s", dest="query", help="query term to run on Twitter")
+ op.add_option("-t", dest="interval", default=300,
+ help="query every X seconds (def: 300)")
+ op.add_option("-o", dest="output", metavar="FILE",
+ help="store tweets in FILE (def: tweets-<query>.json)")
+ op.add_option("-d", dest="debug", action="store_true",
+ help=SUPPRESS_HELP)
+ (options, args) = op.parse_args()
- required_options = ( "searchterm", )
+ required_options = ( "query", )
for o in required_options:
if not getattr(options, o):
parser.print_help()
sys.exit(1)
+ if not options.output:
+ options.output = "tweets-%s.json" % options.query
+
try:
if options.debug: options.interval = sys.stdin.readline
- run(options.interval, options.searchterm)
+ run(options.interval, options.query, options.output)
except KeyboardInterrupt: pass
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
More information about the devel
mailing list