#! /usr/bin/env python # twitter-links.py # Copyright (C) 2008, Christopher L. Conway (cconway@cs.nyu.edu) # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . from sys import stdin, stdout from lxml import etree from re import sub from optparse import OptionParser doc = etree.parse(stdin) def addlinks(path,namespaces=None): for node in doc.xpath(path,namespaces=namespaces): # Turn URLs into HREFs node.text = sub("((https?|s?ftp|ssh)\:\/\/[^\"\s\<\>]*[^.,;'\">\:\s\<\>\)\]\!])", "\\1", node.text) # Turn @ refs into links to the user page node.text = sub("\B@([_a-z0-9]+)", "@\\1", node.text) def stripuser(path,namespaces=None): for node in doc.xpath(path,namespaces=namespaces): node.text = sub("^[A-Za-z0-9_]+:\s*","",node.text) parser = OptionParser(usage = "%prog [options] SITE") parser.add_option("-s", "--strip-username", action="store_true", dest="strip_username", default=False, help="Strip the username from item title and description") (opts,args) = parser.parse_args() # For RSS feeds addlinks("//rss/channel/item/description") # For Atom feeds addlinks( "//n:feed/n:entry/n:content", {'n': 'http://www.w3.org/2005/Atom'} ) if opts.strip_username: # RSS title/description stripuser( "//rss/channel/item/title" ) stripuser( "//rss/channel/item/description" ) # Atom title/description stripuser( "//n:feed/n:entry/n:title", namespaces = {'n': 'http://www.w3.org/2005/Atom'} ) stripuser( "//n:feed/n:entry/n:content", namespaces = {'n': 'http://www.w3.org/2005/Atom'} ) doc.write(stdout)