#! /usr/bin/env python
# twitter-links.py
# Copyright (C) 2008, Christopher L. Conway (cconway@cs.nyu.edu)
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
from sys import stdin, stdout
from lxml import etree
from re import sub
from optparse import OptionParser
doc = etree.parse(stdin)
def addlinks(path,namespaces=None):
for node in doc.xpath(path,namespaces=namespaces):
# Turn URLs into HREFs
node.text = sub("((https?|s?ftp|ssh)\:\/\/[^\"\s\<\>]*[^.,;'\">\:\s\<\>\)\]\!])",
"\\1",
node.text)
# Turn @ refs into links to the user page
node.text = sub("\B@([_a-z0-9]+)",
"@\\1",
node.text)
def stripuser(path,namespaces=None):
for node in doc.xpath(path,namespaces=namespaces):
node.text = sub("^[A-Za-z0-9_]+:\s*","",node.text)
parser = OptionParser(usage = "%prog [options] SITE")
parser.add_option("-s", "--strip-username",
action="store_true",
dest="strip_username",
default=False,
help="Strip the username from item title and description")
(opts,args) = parser.parse_args()
# For RSS feeds
addlinks("//rss/channel/item/description")
# For Atom feeds
addlinks( "//n:feed/n:entry/n:content",
{'n': 'http://www.w3.org/2005/Atom'} )
if opts.strip_username:
# RSS title/description
stripuser( "//rss/channel/item/title" )
stripuser( "//rss/channel/item/description" )
# Atom title/description
stripuser( "//n:feed/n:entry/n:title",
namespaces = {'n': 'http://www.w3.org/2005/Atom'} )
stripuser( "//n:feed/n:entry/n:content",
namespaces = {'n': 'http://www.w3.org/2005/Atom'} )
doc.write(stdout)