Project

General

Profile

Statistics
| Branch: | Revision:

root / env / lib / python2.7 / site-packages / django / utils / feedgenerator.py @ 1a305335

History | View | Annotate | Download (15 KB)

1
"""
2
Syndication feed generation library -- used for generating RSS, etc.
3

4
Sample usage:
5

6
>>> from django.utils import feedgenerator
7
>>> feed = feedgenerator.Rss201rev2Feed(
8
...     title=u"Poynter E-Media Tidbits",
9
...     link=u"http://www.poynter.org/column.asp?id=31",
10
...     description=u"A group Weblog by the sharpest minds in online media/journalism/publishing.",
11
...     language=u"en",
12
... )
13
>>> feed.add_item(
14
...     title="Hello",
15
...     link=u"http://www.holovaty.com/test/",
16
...     description="Testing."
17
... )
18
>>> fp = open('test.rss', 'w')
19
>>> feed.write(fp, 'utf-8')
20
>>> fp.close()
21

22
For definitions of the different versions of RSS, see:
23
http://diveintomark.org/archives/2004/02/04/incompatible-rss
24
"""
25

    
26
import datetime
27
import urlparse
28
from django.utils.xmlutils import SimplerXMLGenerator
29
from django.utils.encoding import force_unicode, iri_to_uri
30
from django.utils import datetime_safe
31
from django.utils.timezone import is_aware
32

    
33
def rfc2822_date(date):
34
    # We can't use strftime() because it produces locale-dependant results, so
35
    # we have to map english month and day names manually
36
    months = ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec',)
37
    days = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')
38
    # Support datetime objects older than 1900
39
    date = datetime_safe.new_datetime(date)
40
    # We do this ourselves to be timezone aware, email.Utils is not tz aware.
41
    dow = days[date.weekday()]
42
    month = months[date.month - 1]
43
    time_str = date.strftime('%s, %%d %s %%Y %%H:%%M:%%S ' % (dow, month))
44
    if is_aware(date):
45
        offset = date.tzinfo.utcoffset(date)
46
        timezone = (offset.days * 24 * 60) + (offset.seconds // 60)
47
        hour, minute = divmod(timezone, 60)
48
        return time_str + "%+03d%02d" % (hour, minute)
49
    else:
50
        return time_str + '-0000'
51

    
52
def rfc3339_date(date):
53
    # Support datetime objects older than 1900
54
    date = datetime_safe.new_datetime(date)
55
    if is_aware(date):
56
        time_str = date.strftime('%Y-%m-%dT%H:%M:%S')
57
        offset = date.tzinfo.utcoffset(date)
58
        timezone = (offset.days * 24 * 60) + (offset.seconds // 60)
59
        hour, minute = divmod(timezone, 60)
60
        return time_str + "%+03d:%02d" % (hour, minute)
61
    else:
62
        return date.strftime('%Y-%m-%dT%H:%M:%SZ')
63

    
64
def get_tag_uri(url, date):
65
    """
66
    Creates a TagURI.
67

68
    See http://diveintomark.org/archives/2004/05/28/howto-atom-id
69
    """
70
    bits = urlparse.urlparse(url)
71
    d = ''
72
    if date is not None:
73
        d = ',%s' % datetime_safe.new_datetime(date).strftime('%Y-%m-%d')
74
    return u'tag:%s%s:%s/%s' % (bits.hostname, d, bits.path, bits.fragment)
75

    
76
class SyndicationFeed(object):
77
    "Base class for all syndication feeds. Subclasses should provide write()"
78
    def __init__(self, title, link, description, language=None, author_email=None,
79
            author_name=None, author_link=None, subtitle=None, categories=None,
80
            feed_url=None, feed_copyright=None, feed_guid=None, ttl=None, **kwargs):
81
        to_unicode = lambda s: force_unicode(s, strings_only=True)
82
        if categories:
83
            categories = [force_unicode(c) for c in categories]
84
        if ttl is not None:
85
            # Force ints to unicode
86
            ttl = force_unicode(ttl)
87
        self.feed = {
88
            'title': to_unicode(title),
89
            'link': iri_to_uri(link),
90
            'description': to_unicode(description),
91
            'language': to_unicode(language),
92
            'author_email': to_unicode(author_email),
93
            'author_name': to_unicode(author_name),
94
            'author_link': iri_to_uri(author_link),
95
            'subtitle': to_unicode(subtitle),
96
            'categories': categories or (),
97
            'feed_url': iri_to_uri(feed_url),
98
            'feed_copyright': to_unicode(feed_copyright),
99
            'id': feed_guid or link,
100
            'ttl': ttl,
101
        }
102
        self.feed.update(kwargs)
103
        self.items = []
104

    
105
    def add_item(self, title, link, description, author_email=None,
106
        author_name=None, author_link=None, pubdate=None, comments=None,
107
        unique_id=None, enclosure=None, categories=(), item_copyright=None,
108
        ttl=None, **kwargs):
109
        """
110
        Adds an item to the feed. All args are expected to be Python Unicode
111
        objects except pubdate, which is a datetime.datetime object, and
112
        enclosure, which is an instance of the Enclosure class.
113
        """
114
        to_unicode = lambda s: force_unicode(s, strings_only=True)
115
        if categories:
116
            categories = [to_unicode(c) for c in categories]
117
        if ttl is not None:
118
            # Force ints to unicode
119
            ttl = force_unicode(ttl)
120
        item = {
121
            'title': to_unicode(title),
122
            'link': iri_to_uri(link),
123
            'description': to_unicode(description),
124
            'author_email': to_unicode(author_email),
125
            'author_name': to_unicode(author_name),
126
            'author_link': iri_to_uri(author_link),
127
            'pubdate': pubdate,
128
            'comments': to_unicode(comments),
129
            'unique_id': to_unicode(unique_id),
130
            'enclosure': enclosure,
131
            'categories': categories or (),
132
            'item_copyright': to_unicode(item_copyright),
133
            'ttl': ttl,
134
        }
135
        item.update(kwargs)
136
        self.items.append(item)
137

    
138
    def num_items(self):
139
        return len(self.items)
140

    
141
    def root_attributes(self):
142
        """
143
        Return extra attributes to place on the root (i.e. feed/channel) element.
144
        Called from write().
145
        """
146
        return {}
147

    
148
    def add_root_elements(self, handler):
149
        """
150
        Add elements in the root (i.e. feed/channel) element. Called
151
        from write().
152
        """
153
        pass
154

    
155
    def item_attributes(self, item):
156
        """
157
        Return extra attributes to place on each item (i.e. item/entry) element.
158
        """
159
        return {}
160

    
161
    def add_item_elements(self, handler, item):
162
        """
163
        Add elements on each item (i.e. item/entry) element.
164
        """
165
        pass
166

    
167
    def write(self, outfile, encoding):
168
        """
169
        Outputs the feed in the given encoding to outfile, which is a file-like
170
        object. Subclasses should override this.
171
        """
172
        raise NotImplementedError
173

    
174
    def writeString(self, encoding):
175
        """
176
        Returns the feed in the given encoding as a string.
177
        """
178
        from StringIO import StringIO
179
        s = StringIO()
180
        self.write(s, encoding)
181
        return s.getvalue()
182

    
183
    def latest_post_date(self):
184
        """
185
        Returns the latest item's pubdate. If none of them have a pubdate,
186
        this returns the current date/time.
187
        """
188
        updates = [i['pubdate'] for i in self.items if i['pubdate'] is not None]
189
        if len(updates) > 0:
190
            updates.sort()
191
            return updates[-1]
192
        else:
193
            return datetime.datetime.now()
194

    
195
class Enclosure(object):
196
    "Represents an RSS enclosure"
197
    def __init__(self, url, length, mime_type):
198
        "All args are expected to be Python Unicode objects"
199
        self.length, self.mime_type = length, mime_type
200
        self.url = iri_to_uri(url)
201

    
202
class RssFeed(SyndicationFeed):
203
    mime_type = 'application/rss+xml; charset=utf-8'
204
    def write(self, outfile, encoding):
205
        handler = SimplerXMLGenerator(outfile, encoding)
206
        handler.startDocument()
207
        handler.startElement(u"rss", self.rss_attributes())
208
        handler.startElement(u"channel", self.root_attributes())
209
        self.add_root_elements(handler)
210
        self.write_items(handler)
211
        self.endChannelElement(handler)
212
        handler.endElement(u"rss")
213

    
214
    def rss_attributes(self):
215
        return {u"version": self._version,
216
                u"xmlns:atom": u"http://www.w3.org/2005/Atom"}
217

    
218
    def write_items(self, handler):
219
        for item in self.items:
220
            handler.startElement(u'item', self.item_attributes(item))
221
            self.add_item_elements(handler, item)
222
            handler.endElement(u"item")
223

    
224
    def add_root_elements(self, handler):
225
        handler.addQuickElement(u"title", self.feed['title'])
226
        handler.addQuickElement(u"link", self.feed['link'])
227
        handler.addQuickElement(u"description", self.feed['description'])
228
        if self.feed['feed_url'] is not None:
229
            handler.addQuickElement(u"atom:link", None,
230
                    {u"rel": u"self", u"href": self.feed['feed_url']})
231
        if self.feed['language'] is not None:
232
            handler.addQuickElement(u"language", self.feed['language'])
233
        for cat in self.feed['categories']:
234
            handler.addQuickElement(u"category", cat)
235
        if self.feed['feed_copyright'] is not None:
236
            handler.addQuickElement(u"copyright", self.feed['feed_copyright'])
237
        handler.addQuickElement(u"lastBuildDate", rfc2822_date(self.latest_post_date()).decode('utf-8'))
238
        if self.feed['ttl'] is not None:
239
            handler.addQuickElement(u"ttl", self.feed['ttl'])
240

    
241
    def endChannelElement(self, handler):
242
        handler.endElement(u"channel")
243

    
244
class RssUserland091Feed(RssFeed):
245
    _version = u"0.91"
246
    def add_item_elements(self, handler, item):
247
        handler.addQuickElement(u"title", item['title'])
248
        handler.addQuickElement(u"link", item['link'])
249
        if item['description'] is not None:
250
            handler.addQuickElement(u"description", item['description'])
251

    
252
class Rss201rev2Feed(RssFeed):
253
    # Spec: http://blogs.law.harvard.edu/tech/rss
254
    _version = u"2.0"
255
    def add_item_elements(self, handler, item):
256
        handler.addQuickElement(u"title", item['title'])
257
        handler.addQuickElement(u"link", item['link'])
258
        if item['description'] is not None:
259
            handler.addQuickElement(u"description", item['description'])
260

    
261
        # Author information.
262
        if item["author_name"] and item["author_email"]:
263
            handler.addQuickElement(u"author", "%s (%s)" % \
264
                (item['author_email'], item['author_name']))
265
        elif item["author_email"]:
266
            handler.addQuickElement(u"author", item["author_email"])
267
        elif item["author_name"]:
268
            handler.addQuickElement(u"dc:creator", item["author_name"], {u"xmlns:dc": u"http://purl.org/dc/elements/1.1/"})
269

    
270
        if item['pubdate'] is not None:
271
            handler.addQuickElement(u"pubDate", rfc2822_date(item['pubdate']).decode('utf-8'))
272
        if item['comments'] is not None:
273
            handler.addQuickElement(u"comments", item['comments'])
274
        if item['unique_id'] is not None:
275
            handler.addQuickElement(u"guid", item['unique_id'])
276
        if item['ttl'] is not None:
277
            handler.addQuickElement(u"ttl", item['ttl'])
278

    
279
        # Enclosure.
280
        if item['enclosure'] is not None:
281
            handler.addQuickElement(u"enclosure", '',
282
                {u"url": item['enclosure'].url, u"length": item['enclosure'].length,
283
                    u"type": item['enclosure'].mime_type})
284

    
285
        # Categories.
286
        for cat in item['categories']:
287
            handler.addQuickElement(u"category", cat)
288

    
289
class Atom1Feed(SyndicationFeed):
290
    # Spec: http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html
291
    mime_type = 'application/atom+xml; charset=utf-8'
292
    ns = u"http://www.w3.org/2005/Atom"
293

    
294
    def write(self, outfile, encoding):
295
        handler = SimplerXMLGenerator(outfile, encoding)
296
        handler.startDocument()
297
        handler.startElement(u'feed', self.root_attributes())
298
        self.add_root_elements(handler)
299
        self.write_items(handler)
300
        handler.endElement(u"feed")
301

    
302
    def root_attributes(self):
303
        if self.feed['language'] is not None:
304
            return {u"xmlns": self.ns, u"xml:lang": self.feed['language']}
305
        else:
306
            return {u"xmlns": self.ns}
307

    
308
    def add_root_elements(self, handler):
309
        handler.addQuickElement(u"title", self.feed['title'])
310
        handler.addQuickElement(u"link", "", {u"rel": u"alternate", u"href": self.feed['link']})
311
        if self.feed['feed_url'] is not None:
312
            handler.addQuickElement(u"link", "", {u"rel": u"self", u"href": self.feed['feed_url']})
313
        handler.addQuickElement(u"id", self.feed['id'])
314
        handler.addQuickElement(u"updated", rfc3339_date(self.latest_post_date()).decode('utf-8'))
315
        if self.feed['author_name'] is not None:
316
            handler.startElement(u"author", {})
317
            handler.addQuickElement(u"name", self.feed['author_name'])
318
            if self.feed['author_email'] is not None:
319
                handler.addQuickElement(u"email", self.feed['author_email'])
320
            if self.feed['author_link'] is not None:
321
                handler.addQuickElement(u"uri", self.feed['author_link'])
322
            handler.endElement(u"author")
323
        if self.feed['subtitle'] is not None:
324
            handler.addQuickElement(u"subtitle", self.feed['subtitle'])
325
        for cat in self.feed['categories']:
326
            handler.addQuickElement(u"category", "", {u"term": cat})
327
        if self.feed['feed_copyright'] is not None:
328
            handler.addQuickElement(u"rights", self.feed['feed_copyright'])
329

    
330
    def write_items(self, handler):
331
        for item in self.items:
332
            handler.startElement(u"entry", self.item_attributes(item))
333
            self.add_item_elements(handler, item)
334
            handler.endElement(u"entry")
335

    
336
    def add_item_elements(self, handler, item):
337
        handler.addQuickElement(u"title", item['title'])
338
        handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"alternate"})
339
        if item['pubdate'] is not None:
340
            handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
341

    
342
        # Author information.
343
        if item['author_name'] is not None:
344
            handler.startElement(u"author", {})
345
            handler.addQuickElement(u"name", item['author_name'])
346
            if item['author_email'] is not None:
347
                handler.addQuickElement(u"email", item['author_email'])
348
            if item['author_link'] is not None:
349
                handler.addQuickElement(u"uri", item['author_link'])
350
            handler.endElement(u"author")
351

    
352
        # Unique ID.
353
        if item['unique_id'] is not None:
354
            unique_id = item['unique_id']
355
        else:
356
            unique_id = get_tag_uri(item['link'], item['pubdate'])
357
        handler.addQuickElement(u"id", unique_id)
358

    
359
        # Summary.
360
        if item['description'] is not None:
361
            handler.addQuickElement(u"summary", item['description'], {u"type": u"html"})
362

    
363
        # Enclosure.
364
        if item['enclosure'] is not None:
365
            handler.addQuickElement(u"link", '',
366
                {u"rel": u"enclosure",
367
                 u"href": item['enclosure'].url,
368
                 u"length": item['enclosure'].length,
369
                 u"type": item['enclosure'].mime_type})
370

    
371
        # Categories.
372
        for cat in item['categories']:
373
            handler.addQuickElement(u"category", u"", {u"term": cat})
374

    
375
        # Rights.
376
        if item['item_copyright'] is not None:
377
            handler.addQuickElement(u"rights", item['item_copyright'])
378

    
379
# This isolates the decision of what the system default is, so calling code can
380
# do "feedgenerator.DefaultFeed" instead of "feedgenerator.Rss201rev2Feed".
381
DefaultFeed = Rss201rev2Feed