root / env / lib / python2.7 / site-packages / django / utils / feedgenerator.py @ 1a305335
History | View | Annotate | Download (15 KB)
1 |
"""
|
---|---|
2 |
Syndication feed generation library -- used for generating RSS, etc.
|
3 |
|
4 |
Sample usage:
|
5 |
|
6 |
>>> from django.utils import feedgenerator
|
7 |
>>> feed = feedgenerator.Rss201rev2Feed(
|
8 |
... title=u"Poynter E-Media Tidbits",
|
9 |
... link=u"http://www.poynter.org/column.asp?id=31",
|
10 |
... description=u"A group Weblog by the sharpest minds in online media/journalism/publishing.",
|
11 |
... language=u"en",
|
12 |
... )
|
13 |
>>> feed.add_item(
|
14 |
... title="Hello",
|
15 |
... link=u"http://www.holovaty.com/test/",
|
16 |
... description="Testing."
|
17 |
... )
|
18 |
>>> fp = open('test.rss', 'w')
|
19 |
>>> feed.write(fp, 'utf-8')
|
20 |
>>> fp.close()
|
21 |
|
22 |
For definitions of the different versions of RSS, see:
|
23 |
http://diveintomark.org/archives/2004/02/04/incompatible-rss
|
24 |
"""
|
25 |
|
26 |
import datetime |
27 |
import urlparse |
28 |
from django.utils.xmlutils import SimplerXMLGenerator |
29 |
from django.utils.encoding import force_unicode, iri_to_uri |
30 |
from django.utils import datetime_safe |
31 |
from django.utils.timezone import is_aware |
32 |
|
33 |
def rfc2822_date(date): |
34 |
# We can't use strftime() because it produces locale-dependant results, so
|
35 |
# we have to map english month and day names manually
|
36 |
months = ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec',) |
37 |
days = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun') |
38 |
# Support datetime objects older than 1900
|
39 |
date = datetime_safe.new_datetime(date) |
40 |
# We do this ourselves to be timezone aware, email.Utils is not tz aware.
|
41 |
dow = days[date.weekday()] |
42 |
month = months[date.month - 1]
|
43 |
time_str = date.strftime('%s, %%d %s %%Y %%H:%%M:%%S ' % (dow, month))
|
44 |
if is_aware(date):
|
45 |
offset = date.tzinfo.utcoffset(date) |
46 |
timezone = (offset.days * 24 * 60) + (offset.seconds // 60) |
47 |
hour, minute = divmod(timezone, 60) |
48 |
return time_str + "%+03d%02d" % (hour, minute) |
49 |
else:
|
50 |
return time_str + '-0000' |
51 |
|
52 |
def rfc3339_date(date): |
53 |
# Support datetime objects older than 1900
|
54 |
date = datetime_safe.new_datetime(date) |
55 |
if is_aware(date):
|
56 |
time_str = date.strftime('%Y-%m-%dT%H:%M:%S')
|
57 |
offset = date.tzinfo.utcoffset(date) |
58 |
timezone = (offset.days * 24 * 60) + (offset.seconds // 60) |
59 |
hour, minute = divmod(timezone, 60) |
60 |
return time_str + "%+03d:%02d" % (hour, minute) |
61 |
else:
|
62 |
return date.strftime('%Y-%m-%dT%H:%M:%SZ') |
63 |
|
64 |
def get_tag_uri(url, date): |
65 |
"""
|
66 |
Creates a TagURI.
|
67 |
|
68 |
See http://diveintomark.org/archives/2004/05/28/howto-atom-id
|
69 |
"""
|
70 |
bits = urlparse.urlparse(url) |
71 |
d = ''
|
72 |
if date is not None: |
73 |
d = ',%s' % datetime_safe.new_datetime(date).strftime('%Y-%m-%d') |
74 |
return u'tag:%s%s:%s/%s' % (bits.hostname, d, bits.path, bits.fragment) |
75 |
|
76 |
class SyndicationFeed(object): |
77 |
"Base class for all syndication feeds. Subclasses should provide write()"
|
78 |
def __init__(self, title, link, description, language=None, author_email=None, |
79 |
author_name=None, author_link=None, subtitle=None, categories=None, |
80 |
feed_url=None, feed_copyright=None, feed_guid=None, ttl=None, **kwargs): |
81 |
to_unicode = lambda s: force_unicode(s, strings_only=True) |
82 |
if categories:
|
83 |
categories = [force_unicode(c) for c in categories] |
84 |
if ttl is not None: |
85 |
# Force ints to unicode
|
86 |
ttl = force_unicode(ttl) |
87 |
self.feed = {
|
88 |
'title': to_unicode(title),
|
89 |
'link': iri_to_uri(link),
|
90 |
'description': to_unicode(description),
|
91 |
'language': to_unicode(language),
|
92 |
'author_email': to_unicode(author_email),
|
93 |
'author_name': to_unicode(author_name),
|
94 |
'author_link': iri_to_uri(author_link),
|
95 |
'subtitle': to_unicode(subtitle),
|
96 |
'categories': categories or (), |
97 |
'feed_url': iri_to_uri(feed_url),
|
98 |
'feed_copyright': to_unicode(feed_copyright),
|
99 |
'id': feed_guid or link, |
100 |
'ttl': ttl,
|
101 |
} |
102 |
self.feed.update(kwargs)
|
103 |
self.items = []
|
104 |
|
105 |
def add_item(self, title, link, description, author_email=None, |
106 |
author_name=None, author_link=None, pubdate=None, comments=None, |
107 |
unique_id=None, enclosure=None, categories=(), item_copyright=None, |
108 |
ttl=None, **kwargs):
|
109 |
"""
|
110 |
Adds an item to the feed. All args are expected to be Python Unicode
|
111 |
objects except pubdate, which is a datetime.datetime object, and
|
112 |
enclosure, which is an instance of the Enclosure class.
|
113 |
"""
|
114 |
to_unicode = lambda s: force_unicode(s, strings_only=True) |
115 |
if categories:
|
116 |
categories = [to_unicode(c) for c in categories] |
117 |
if ttl is not None: |
118 |
# Force ints to unicode
|
119 |
ttl = force_unicode(ttl) |
120 |
item = { |
121 |
'title': to_unicode(title),
|
122 |
'link': iri_to_uri(link),
|
123 |
'description': to_unicode(description),
|
124 |
'author_email': to_unicode(author_email),
|
125 |
'author_name': to_unicode(author_name),
|
126 |
'author_link': iri_to_uri(author_link),
|
127 |
'pubdate': pubdate,
|
128 |
'comments': to_unicode(comments),
|
129 |
'unique_id': to_unicode(unique_id),
|
130 |
'enclosure': enclosure,
|
131 |
'categories': categories or (), |
132 |
'item_copyright': to_unicode(item_copyright),
|
133 |
'ttl': ttl,
|
134 |
} |
135 |
item.update(kwargs) |
136 |
self.items.append(item)
|
137 |
|
138 |
def num_items(self): |
139 |
return len(self.items) |
140 |
|
141 |
def root_attributes(self): |
142 |
"""
|
143 |
Return extra attributes to place on the root (i.e. feed/channel) element.
|
144 |
Called from write().
|
145 |
"""
|
146 |
return {}
|
147 |
|
148 |
def add_root_elements(self, handler): |
149 |
"""
|
150 |
Add elements in the root (i.e. feed/channel) element. Called
|
151 |
from write().
|
152 |
"""
|
153 |
pass
|
154 |
|
155 |
def item_attributes(self, item): |
156 |
"""
|
157 |
Return extra attributes to place on each item (i.e. item/entry) element.
|
158 |
"""
|
159 |
return {}
|
160 |
|
161 |
def add_item_elements(self, handler, item): |
162 |
"""
|
163 |
Add elements on each item (i.e. item/entry) element.
|
164 |
"""
|
165 |
pass
|
166 |
|
167 |
def write(self, outfile, encoding): |
168 |
"""
|
169 |
Outputs the feed in the given encoding to outfile, which is a file-like
|
170 |
object. Subclasses should override this.
|
171 |
"""
|
172 |
raise NotImplementedError |
173 |
|
174 |
def writeString(self, encoding): |
175 |
"""
|
176 |
Returns the feed in the given encoding as a string.
|
177 |
"""
|
178 |
from StringIO import StringIO |
179 |
s = StringIO() |
180 |
self.write(s, encoding)
|
181 |
return s.getvalue()
|
182 |
|
183 |
def latest_post_date(self): |
184 |
"""
|
185 |
Returns the latest item's pubdate. If none of them have a pubdate,
|
186 |
this returns the current date/time.
|
187 |
"""
|
188 |
updates = [i['pubdate'] for i in self.items if i['pubdate'] is not None] |
189 |
if len(updates) > 0: |
190 |
updates.sort() |
191 |
return updates[-1] |
192 |
else:
|
193 |
return datetime.datetime.now()
|
194 |
|
195 |
class Enclosure(object): |
196 |
"Represents an RSS enclosure"
|
197 |
def __init__(self, url, length, mime_type): |
198 |
"All args are expected to be Python Unicode objects"
|
199 |
self.length, self.mime_type = length, mime_type |
200 |
self.url = iri_to_uri(url)
|
201 |
|
202 |
class RssFeed(SyndicationFeed): |
203 |
mime_type = 'application/rss+xml; charset=utf-8'
|
204 |
def write(self, outfile, encoding): |
205 |
handler = SimplerXMLGenerator(outfile, encoding) |
206 |
handler.startDocument() |
207 |
handler.startElement(u"rss", self.rss_attributes()) |
208 |
handler.startElement(u"channel", self.root_attributes()) |
209 |
self.add_root_elements(handler)
|
210 |
self.write_items(handler)
|
211 |
self.endChannelElement(handler)
|
212 |
handler.endElement(u"rss")
|
213 |
|
214 |
def rss_attributes(self): |
215 |
return {u"version": self._version, |
216 |
u"xmlns:atom": u"http://www.w3.org/2005/Atom"} |
217 |
|
218 |
def write_items(self, handler): |
219 |
for item in self.items: |
220 |
handler.startElement(u'item', self.item_attributes(item)) |
221 |
self.add_item_elements(handler, item)
|
222 |
handler.endElement(u"item")
|
223 |
|
224 |
def add_root_elements(self, handler): |
225 |
handler.addQuickElement(u"title", self.feed['title']) |
226 |
handler.addQuickElement(u"link", self.feed['link']) |
227 |
handler.addQuickElement(u"description", self.feed['description']) |
228 |
if self.feed['feed_url'] is not None: |
229 |
handler.addQuickElement(u"atom:link", None, |
230 |
{u"rel": u"self", u"href": self.feed['feed_url']}) |
231 |
if self.feed['language'] is not None: |
232 |
handler.addQuickElement(u"language", self.feed['language']) |
233 |
for cat in self.feed['categories']: |
234 |
handler.addQuickElement(u"category", cat)
|
235 |
if self.feed['feed_copyright'] is not None: |
236 |
handler.addQuickElement(u"copyright", self.feed['feed_copyright']) |
237 |
handler.addQuickElement(u"lastBuildDate", rfc2822_date(self.latest_post_date()).decode('utf-8')) |
238 |
if self.feed['ttl'] is not None: |
239 |
handler.addQuickElement(u"ttl", self.feed['ttl']) |
240 |
|
241 |
def endChannelElement(self, handler): |
242 |
handler.endElement(u"channel")
|
243 |
|
244 |
class RssUserland091Feed(RssFeed): |
245 |
_version = u"0.91"
|
246 |
def add_item_elements(self, handler, item): |
247 |
handler.addQuickElement(u"title", item['title']) |
248 |
handler.addQuickElement(u"link", item['link']) |
249 |
if item['description'] is not None: |
250 |
handler.addQuickElement(u"description", item['description']) |
251 |
|
252 |
class Rss201rev2Feed(RssFeed): |
253 |
# Spec: http://blogs.law.harvard.edu/tech/rss
|
254 |
_version = u"2.0"
|
255 |
def add_item_elements(self, handler, item): |
256 |
handler.addQuickElement(u"title", item['title']) |
257 |
handler.addQuickElement(u"link", item['link']) |
258 |
if item['description'] is not None: |
259 |
handler.addQuickElement(u"description", item['description']) |
260 |
|
261 |
# Author information.
|
262 |
if item["author_name"] and item["author_email"]: |
263 |
handler.addQuickElement(u"author", "%s (%s)" % \ |
264 |
(item['author_email'], item['author_name'])) |
265 |
elif item["author_email"]: |
266 |
handler.addQuickElement(u"author", item["author_email"]) |
267 |
elif item["author_name"]: |
268 |
handler.addQuickElement(u"dc:creator", item["author_name"], {u"xmlns:dc": u"http://purl.org/dc/elements/1.1/"}) |
269 |
|
270 |
if item['pubdate'] is not None: |
271 |
handler.addQuickElement(u"pubDate", rfc2822_date(item['pubdate']).decode('utf-8')) |
272 |
if item['comments'] is not None: |
273 |
handler.addQuickElement(u"comments", item['comments']) |
274 |
if item['unique_id'] is not None: |
275 |
handler.addQuickElement(u"guid", item['unique_id']) |
276 |
if item['ttl'] is not None: |
277 |
handler.addQuickElement(u"ttl", item['ttl']) |
278 |
|
279 |
# Enclosure.
|
280 |
if item['enclosure'] is not None: |
281 |
handler.addQuickElement(u"enclosure", '', |
282 |
{u"url": item['enclosure'].url, u"length": item['enclosure'].length, |
283 |
u"type": item['enclosure'].mime_type}) |
284 |
|
285 |
# Categories.
|
286 |
for cat in item['categories']: |
287 |
handler.addQuickElement(u"category", cat)
|
288 |
|
289 |
class Atom1Feed(SyndicationFeed): |
290 |
# Spec: http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html
|
291 |
mime_type = 'application/atom+xml; charset=utf-8'
|
292 |
ns = u"http://www.w3.org/2005/Atom"
|
293 |
|
294 |
def write(self, outfile, encoding): |
295 |
handler = SimplerXMLGenerator(outfile, encoding) |
296 |
handler.startDocument() |
297 |
handler.startElement(u'feed', self.root_attributes()) |
298 |
self.add_root_elements(handler)
|
299 |
self.write_items(handler)
|
300 |
handler.endElement(u"feed")
|
301 |
|
302 |
def root_attributes(self): |
303 |
if self.feed['language'] is not None: |
304 |
return {u"xmlns": self.ns, u"xml:lang": self.feed['language']} |
305 |
else:
|
306 |
return {u"xmlns": self.ns} |
307 |
|
308 |
def add_root_elements(self, handler): |
309 |
handler.addQuickElement(u"title", self.feed['title']) |
310 |
handler.addQuickElement(u"link", "", {u"rel": u"alternate", u"href": self.feed['link']}) |
311 |
if self.feed['feed_url'] is not None: |
312 |
handler.addQuickElement(u"link", "", {u"rel": u"self", u"href": self.feed['feed_url']}) |
313 |
handler.addQuickElement(u"id", self.feed['id']) |
314 |
handler.addQuickElement(u"updated", rfc3339_date(self.latest_post_date()).decode('utf-8')) |
315 |
if self.feed['author_name'] is not None: |
316 |
handler.startElement(u"author", {})
|
317 |
handler.addQuickElement(u"name", self.feed['author_name']) |
318 |
if self.feed['author_email'] is not None: |
319 |
handler.addQuickElement(u"email", self.feed['author_email']) |
320 |
if self.feed['author_link'] is not None: |
321 |
handler.addQuickElement(u"uri", self.feed['author_link']) |
322 |
handler.endElement(u"author")
|
323 |
if self.feed['subtitle'] is not None: |
324 |
handler.addQuickElement(u"subtitle", self.feed['subtitle']) |
325 |
for cat in self.feed['categories']: |
326 |
handler.addQuickElement(u"category", "", {u"term": cat}) |
327 |
if self.feed['feed_copyright'] is not None: |
328 |
handler.addQuickElement(u"rights", self.feed['feed_copyright']) |
329 |
|
330 |
def write_items(self, handler): |
331 |
for item in self.items: |
332 |
handler.startElement(u"entry", self.item_attributes(item)) |
333 |
self.add_item_elements(handler, item)
|
334 |
handler.endElement(u"entry")
|
335 |
|
336 |
def add_item_elements(self, handler, item): |
337 |
handler.addQuickElement(u"title", item['title']) |
338 |
handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"alternate"}) |
339 |
if item['pubdate'] is not None: |
340 |
handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8')) |
341 |
|
342 |
# Author information.
|
343 |
if item['author_name'] is not None: |
344 |
handler.startElement(u"author", {})
|
345 |
handler.addQuickElement(u"name", item['author_name']) |
346 |
if item['author_email'] is not None: |
347 |
handler.addQuickElement(u"email", item['author_email']) |
348 |
if item['author_link'] is not None: |
349 |
handler.addQuickElement(u"uri", item['author_link']) |
350 |
handler.endElement(u"author")
|
351 |
|
352 |
# Unique ID.
|
353 |
if item['unique_id'] is not None: |
354 |
unique_id = item['unique_id']
|
355 |
else:
|
356 |
unique_id = get_tag_uri(item['link'], item['pubdate']) |
357 |
handler.addQuickElement(u"id", unique_id)
|
358 |
|
359 |
# Summary.
|
360 |
if item['description'] is not None: |
361 |
handler.addQuickElement(u"summary", item['description'], {u"type": u"html"}) |
362 |
|
363 |
# Enclosure.
|
364 |
if item['enclosure'] is not None: |
365 |
handler.addQuickElement(u"link", '', |
366 |
{u"rel": u"enclosure", |
367 |
u"href": item['enclosure'].url, |
368 |
u"length": item['enclosure'].length, |
369 |
u"type": item['enclosure'].mime_type}) |
370 |
|
371 |
# Categories.
|
372 |
for cat in item['categories']: |
373 |
handler.addQuickElement(u"category", u"", {u"term": cat}) |
374 |
|
375 |
# Rights.
|
376 |
if item['item_copyright'] is not None: |
377 |
handler.addQuickElement(u"rights", item['item_copyright']) |
378 |
|
379 |
# This isolates the decision of what the system default is, so calling code can
|
380 |
# do "feedgenerator.DefaultFeed" instead of "feedgenerator.Rss201rev2Feed".
|
381 |
DefaultFeed = Rss201rev2Feed |