1"""
2Syndication feed generation library -- used for generating RSS, etc.
3
4Sample usage:
5
6>>> from django.utils import feedgenerator
7>>> feed = feedgenerator.Rss201rev2Feed(
8... title="Poynter E-Media Tidbits",
9... link="http://www.poynter.org/column.asp?id=31",
10... description="A group blog by the sharpest minds in online journalism.",
11... language="en",
12... )
13>>> feed.add_item(
14... title="Hello",
15... link="http://www.holovaty.com/test/",
16... description="Testing."
17... )
18>>> with open('test.rss', 'w') as fp:
19... feed.write(fp, 'utf-8')
20
21For definitions of the different versions of RSS, see:
22https://web.archive.org/web/20110718035220/http://diveintomark.org/archives/2004/02/04/incompatible-rss
23"""
24
25import datetime
26import email
27import mimetypes
28from io import StringIO
29from urllib.parse import urlparse
30
31from django.utils.encoding import iri_to_uri
32from django.utils.xmlutils import SimplerXMLGenerator
33
34
35def rfc2822_date(date):
36 if not isinstance(date, datetime.datetime):
37 date = datetime.datetime.combine(date, datetime.time())
38 return email.utils.format_datetime(date)
39
40
41def rfc3339_date(date):
42 if not isinstance(date, datetime.datetime):
43 date = datetime.datetime.combine(date, datetime.time())
44 return date.isoformat() + ("Z" if date.utcoffset() is None else "")
45
46
47def get_tag_uri(url, date):
48 """
49 Create a TagURI.
50
51 See
52 https://web.archive.org/web/20110514113830/http://diveintomark.org/archives/2004/05/28/howto-atom-id
53 """
54 bits = urlparse(url)
55 d = ""
56 if date is not None:
57 d = ",%s" % date.strftime("%Y-%m-%d")
58 return "tag:%s%s:%s/%s" % (bits.hostname, d, bits.path, bits.fragment)
59
60
61def _guess_stylesheet_mimetype(url):
62 """
63 Return the given stylesheet's mimetype tuple, using a slightly custom
64 version of Python's mimetypes.guess_type().
65 """
66 mimetypedb = mimetypes.MimeTypes()
67
68 # The official mimetype for XSLT files is technically `application/xslt+xml`
69 # but as of 2024 almost no browser supports that (they all expect text/xsl).
70 # On top of that, windows seems to assume that the type for xsl is text/xml.
71 mimetypedb.readfp(StringIO("text/xsl\txsl\ntext/xsl\txslt"))
72
73 return mimetypedb.guess_type(url)
74
75
76class Stylesheet:
77 """An RSS stylesheet"""
78
79 def __init__(self, url, mimetype="", media="screen"):
80 self._url = url
81 self._mimetype = mimetype
82 self.media = media
83
84 # Using a property to delay the evaluation of self._url as late as possible
85 # in case of a lazy object (like reverse_lazy(...) for example).
86 @property
87 def url(self):
88 return iri_to_uri(self._url)
89
90 @property
91 def mimetype(self):
92 if self._mimetype == "":
93 return _guess_stylesheet_mimetype(self.url)[0]
94 return self._mimetype
95
96 def __str__(self):
97 data = [f'href="{self.url}"']
98 if self.mimetype is not None:
99 data.append(f'type="{self.mimetype}"')
100 if self.media is not None:
101 data.append(f'media="{self.media}"')
102 return " ".join(data)
103
104 def __repr__(self):
105 return repr((self.url, self.mimetype, self.media))
106
107
108class SyndicationFeed:
109 "Base class for all syndication feeds. Subclasses should provide write()"
110
111 def __init__(
112 self,
113 title,
114 link,
115 description,
116 language=None,
117 author_email=None,
118 author_name=None,
119 author_link=None,
120 subtitle=None,
121 categories=None,
122 feed_url=None,
123 feed_copyright=None,
124 feed_guid=None,
125 ttl=None,
126 stylesheets=None,
127 **kwargs,
128 ):
129 def to_str(s):
130 return str(s) if s is not None else s
131
132 def to_stylesheet(s):
133 return s if isinstance(s, Stylesheet) else Stylesheet(s)
134
135 categories = categories and [str(c) for c in categories]
136
137 if stylesheets is not None:
138 if isinstance(stylesheets, (Stylesheet, str)):
139 raise TypeError(
140 f"stylesheets should be a list, not {stylesheets.__class__}"
141 )
142 stylesheets = [to_stylesheet(s) for s in stylesheets]
143
144 self.feed = {
145 "title": to_str(title),
146 "link": iri_to_uri(link),
147 "description": to_str(description),
148 "language": to_str(language),
149 "author_email": to_str(author_email),
150 "author_name": to_str(author_name),
151 "author_link": iri_to_uri(author_link),
152 "subtitle": to_str(subtitle),
153 "categories": categories or (),
154 "feed_url": iri_to_uri(feed_url),
155 "feed_copyright": to_str(feed_copyright),
156 "id": feed_guid or link,
157 "ttl": to_str(ttl),
158 "stylesheets": stylesheets,
159 **kwargs,
160 }
161 self.items = []
162
163 def add_item(
164 self,
165 title,
166 link,
167 description,
168 author_email=None,
169 author_name=None,
170 author_link=None,
171 pubdate=None,
172 comments=None,
173 unique_id=None,
174 unique_id_is_permalink=None,
175 categories=(),
176 item_copyright=None,
177 ttl=None,
178 updateddate=None,
179 enclosures=None,
180 **kwargs,
181 ):
182 """
183 Add an item to the feed. All args are expected to be strings except
184 pubdate and updateddate, which are datetime.datetime objects, and
185 enclosures, which is an iterable of instances of the Enclosure class.
186 """
187
188 def to_str(s):
189 return str(s) if s is not None else s
190
191 categories = categories and [to_str(c) for c in categories]
192 self.items.append(
193 {
194 "title": to_str(title),
195 "link": iri_to_uri(link),
196 "description": to_str(description),
197 "author_email": to_str(author_email),
198 "author_name": to_str(author_name),
199 "author_link": iri_to_uri(author_link),
200 "pubdate": pubdate,
201 "updateddate": updateddate,
202 "comments": to_str(comments),
203 "unique_id": to_str(unique_id),
204 "unique_id_is_permalink": unique_id_is_permalink,
205 "enclosures": enclosures or (),
206 "categories": categories or (),
207 "item_copyright": to_str(item_copyright),
208 "ttl": to_str(ttl),
209 **kwargs,
210 }
211 )
212
213 def num_items(self):
214 return len(self.items)
215
216 def root_attributes(self):
217 """
218 Return extra attributes to place on the root (i.e. feed/channel) element.
219 Called from write().
220 """
221 return {}
222
223 def add_root_elements(self, handler):
224 """
225 Add elements in the root (i.e. feed/channel) element. Called
226 from write().
227 """
228 pass
229
230 def add_stylesheets(self, handler):
231 """
232 Add stylesheet(s) to the feed. Called from write().
233 """
234 pass
235
236 def item_attributes(self, item):
237 """
238 Return extra attributes to place on each item (i.e. item/entry) element.
239 """
240 return {}
241
242 def add_item_elements(self, handler, item):
243 """
244 Add elements on each item (i.e. item/entry) element.
245 """
246 pass
247
248 def write(self, outfile, encoding):
249 """
250 Output the feed in the given encoding to outfile, which is a file-like
251 object. Subclasses should override this.
252 """
253 raise NotImplementedError(
254 "subclasses of SyndicationFeed must provide a write() method"
255 )
256
257 def writeString(self, encoding):
258 """
259 Return the feed in the given encoding as a string.
260 """
261 s = StringIO()
262 self.write(s, encoding)
263 return s.getvalue()
264
265 def latest_post_date(self):
266 """
267 Return the latest item's pubdate or updateddate. If no items
268 have either of these attributes this return the current UTC date/time.
269 """
270 latest_date = None
271 date_keys = ("updateddate", "pubdate")
272
273 for item in self.items:
274 for date_key in date_keys:
275 item_date = item.get(date_key)
276 if item_date:
277 if latest_date is None or item_date > latest_date:
278 latest_date = item_date
279
280 return latest_date or datetime.datetime.now(tz=datetime.timezone.utc)
281
282
283class Enclosure:
284 """An RSS enclosure"""
285
286 def __init__(self, url, length, mime_type):
287 "All args are expected to be strings"
288 self.length, self.mime_type = length, mime_type
289 self.url = iri_to_uri(url)
290
291
292class RssFeed(SyndicationFeed):
293 content_type = "application/rss+xml; charset=utf-8"
294
295 def write(self, outfile, encoding):
296 handler = SimplerXMLGenerator(outfile, encoding, short_empty_elements=True)
297 handler.startDocument()
298 # Any stylesheet must come after the start of the document but before any tag.
299 # https://www.w3.org/Style/styling-XML.en.html
300 self.add_stylesheets(handler)
301 handler.startElement("rss", self.rss_attributes())
302 handler.startElement("channel", self.root_attributes())
303 self.add_root_elements(handler)
304 self.write_items(handler)
305 self.endChannelElement(handler)
306 handler.endElement("rss")
307
308 def rss_attributes(self):
309 return {
310 "version": self._version,
311 "xmlns:atom": "http://www.w3.org/2005/Atom",
312 }
313
314 def write_items(self, handler):
315 for item in self.items:
316 handler.startElement("item", self.item_attributes(item))
317 self.add_item_elements(handler, item)
318 handler.endElement("item")
319
320 def add_stylesheets(self, handler):
321 for stylesheet in self.feed["stylesheets"] or []:
322 handler.processingInstruction("xml-stylesheet", stylesheet)
323
324 def add_root_elements(self, handler):
325 handler.addQuickElement("title", self.feed["title"])
326 handler.addQuickElement("link", self.feed["link"])
327 handler.addQuickElement("description", self.feed["description"])
328 if self.feed["feed_url"] is not None:
329 handler.addQuickElement(
330 "atom:link", None, {"rel": "self", "href": self.feed["feed_url"]}
331 )
332 if self.feed["language"] is not None:
333 handler.addQuickElement("language", self.feed["language"])
334 for cat in self.feed["categories"]:
335 handler.addQuickElement("category", cat)
336 if self.feed["feed_copyright"] is not None:
337 handler.addQuickElement("copyright", self.feed["feed_copyright"])
338 handler.addQuickElement("lastBuildDate", rfc2822_date(self.latest_post_date()))
339 if self.feed["ttl"] is not None:
340 handler.addQuickElement("ttl", self.feed["ttl"])
341
342 def endChannelElement(self, handler):
343 handler.endElement("channel")
344
345
346class RssUserland091Feed(RssFeed):
347 _version = "0.91"
348
349 def add_item_elements(self, handler, item):
350 handler.addQuickElement("title", item["title"])
351 handler.addQuickElement("link", item["link"])
352 if item["description"] is not None:
353 handler.addQuickElement("description", item["description"])
354
355
356class Rss201rev2Feed(RssFeed):
357 # Spec: https://cyber.harvard.edu/rss/rss.html
358 _version = "2.0"
359
360 def add_item_elements(self, handler, item):
361 handler.addQuickElement("title", item["title"])
362 handler.addQuickElement("link", item["link"])
363 if item["description"] is not None:
364 handler.addQuickElement("description", item["description"])
365
366 # Author information.
367 if item["author_name"] and item["author_email"]:
368 handler.addQuickElement(
369 "author", "%s (%s)" % (item["author_email"], item["author_name"])
370 )
371 elif item["author_email"]:
372 handler.addQuickElement("author", item["author_email"])
373 elif item["author_name"]:
374 handler.addQuickElement(
375 "dc:creator",
376 item["author_name"],
377 {"xmlns:dc": "http://purl.org/dc/elements/1.1/"},
378 )
379
380 if item["pubdate"] is not None:
381 handler.addQuickElement("pubDate", rfc2822_date(item["pubdate"]))
382 if item["comments"] is not None:
383 handler.addQuickElement("comments", item["comments"])
384 if item["unique_id"] is not None:
385 guid_attrs = {}
386 if isinstance(item.get("unique_id_is_permalink"), bool):
387 guid_attrs["isPermaLink"] = str(item["unique_id_is_permalink"]).lower()
388 handler.addQuickElement("guid", item["unique_id"], guid_attrs)
389 if item["ttl"] is not None:
390 handler.addQuickElement("ttl", item["ttl"])
391
392 # Enclosure.
393 if item["enclosures"]:
394 enclosures = list(item["enclosures"])
395 if len(enclosures) > 1:
396 raise ValueError(
397 "RSS feed items may only have one enclosure, see "
398 "http://www.rssboard.org/rss-profile#element-channel-item-enclosure"
399 )
400 enclosure = enclosures[0]
401 handler.addQuickElement(
402 "enclosure",
403 "",
404 {
405 "url": enclosure.url,
406 "length": enclosure.length,
407 "type": enclosure.mime_type,
408 },
409 )
410
411 # Categories.
412 for cat in item["categories"]:
413 handler.addQuickElement("category", cat)
414
415
416class Atom1Feed(SyndicationFeed):
417 # Spec: https://tools.ietf.org/html/rfc4287
418 content_type = "application/atom+xml; charset=utf-8"
419 ns = "http://www.w3.org/2005/Atom"
420
421 def write(self, outfile, encoding):
422 handler = SimplerXMLGenerator(outfile, encoding, short_empty_elements=True)
423 handler.startDocument()
424 handler.startElement("feed", self.root_attributes())
425 self.add_root_elements(handler)
426 self.write_items(handler)
427 handler.endElement("feed")
428
429 def root_attributes(self):
430 if self.feed["language"] is not None:
431 return {"xmlns": self.ns, "xml:lang": self.feed["language"]}
432 else:
433 return {"xmlns": self.ns}
434
435 def add_root_elements(self, handler):
436 handler.addQuickElement("title", self.feed["title"])
437 handler.addQuickElement(
438 "link", "", {"rel": "alternate", "href": self.feed["link"]}
439 )
440 if self.feed["feed_url"] is not None:
441 handler.addQuickElement(
442 "link", "", {"rel": "self", "href": self.feed["feed_url"]}
443 )
444 handler.addQuickElement("id", self.feed["id"])
445 handler.addQuickElement("updated", rfc3339_date(self.latest_post_date()))
446 if self.feed["author_name"] is not None:
447 handler.startElement("author", {})
448 handler.addQuickElement("name", self.feed["author_name"])
449 if self.feed["author_email"] is not None:
450 handler.addQuickElement("email", self.feed["author_email"])
451 if self.feed["author_link"] is not None:
452 handler.addQuickElement("uri", self.feed["author_link"])
453 handler.endElement("author")
454 if self.feed["subtitle"] is not None:
455 handler.addQuickElement("subtitle", self.feed["subtitle"])
456 for cat in self.feed["categories"]:
457 handler.addQuickElement("category", "", {"term": cat})
458 if self.feed["feed_copyright"] is not None:
459 handler.addQuickElement("rights", self.feed["feed_copyright"])
460
461 def write_items(self, handler):
462 for item in self.items:
463 handler.startElement("entry", self.item_attributes(item))
464 self.add_item_elements(handler, item)
465 handler.endElement("entry")
466
467 def add_item_elements(self, handler, item):
468 handler.addQuickElement("title", item["title"])
469 handler.addQuickElement("link", "", {"href": item["link"], "rel": "alternate"})
470
471 if item["pubdate"] is not None:
472 handler.addQuickElement("published", rfc3339_date(item["pubdate"]))
473
474 if item["updateddate"] is not None:
475 handler.addQuickElement("updated", rfc3339_date(item["updateddate"]))
476
477 # Author information.
478 if item["author_name"] is not None:
479 handler.startElement("author", {})
480 handler.addQuickElement("name", item["author_name"])
481 if item["author_email"] is not None:
482 handler.addQuickElement("email", item["author_email"])
483 if item["author_link"] is not None:
484 handler.addQuickElement("uri", item["author_link"])
485 handler.endElement("author")
486
487 # Unique ID.
488 if item["unique_id"] is not None:
489 unique_id = item["unique_id"]
490 else:
491 unique_id = get_tag_uri(item["link"], item["pubdate"])
492 handler.addQuickElement("id", unique_id)
493
494 # Summary.
495 if item["description"] is not None:
496 handler.addQuickElement("summary", item["description"], {"type": "html"})
497
498 # Enclosures.
499 for enclosure in item["enclosures"]:
500 handler.addQuickElement(
501 "link",
502 "",
503 {
504 "rel": "enclosure",
505 "href": enclosure.url,
506 "length": enclosure.length,
507 "type": enclosure.mime_type,
508 },
509 )
510
511 # Categories.
512 for cat in item["categories"]:
513 handler.addQuickElement("category", "", {"term": cat})
514
515 # Rights.
516 if item["item_copyright"] is not None:
517 handler.addQuickElement("rights", item["item_copyright"])
518
519
520# This isolates the decision of what the system default is, so calling code can
521# do "feedgenerator.DefaultFeed" instead of "feedgenerator.Rss201rev2Feed".
522DefaultFeed = Rss201rev2Feed