Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/django/utils/feedgenerator.py: 26%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

243 statements  

1""" 

2Syndication feed generation library -- used for generating RSS, etc. 

3 

4Sample usage: 

5 

6>>> from django.utils import feedgenerator 

7>>> feed = feedgenerator.Rss201rev2Feed( 

8... title="Poynter E-Media Tidbits", 

9... link="http://www.poynter.org/column.asp?id=31", 

10... description="A group blog by the sharpest minds in online journalism.", 

11... language="en", 

12... ) 

13>>> feed.add_item( 

14... title="Hello", 

15... link="http://www.holovaty.com/test/", 

16... description="Testing." 

17... ) 

18>>> with open('test.rss', 'w') as fp: 

19... feed.write(fp, 'utf-8') 

20 

21For definitions of the different versions of RSS, see: 

22https://web.archive.org/web/20110718035220/http://diveintomark.org/archives/2004/02/04/incompatible-rss 

23""" 

24 

25import datetime 

26import email 

27import mimetypes 

28from io import StringIO 

29from urllib.parse import urlparse 

30 

31from django.utils.encoding import iri_to_uri 

32from django.utils.xmlutils import SimplerXMLGenerator 

33 

34 

35def rfc2822_date(date): 

36 if not isinstance(date, datetime.datetime): 

37 date = datetime.datetime.combine(date, datetime.time()) 

38 return email.utils.format_datetime(date) 

39 

40 

41def rfc3339_date(date): 

42 if not isinstance(date, datetime.datetime): 

43 date = datetime.datetime.combine(date, datetime.time()) 

44 return date.isoformat() + ("Z" if date.utcoffset() is None else "") 

45 

46 

47def get_tag_uri(url, date): 

48 """ 

49 Create a TagURI. 

50 

51 See 

52 https://web.archive.org/web/20110514113830/http://diveintomark.org/archives/2004/05/28/howto-atom-id 

53 """ 

54 bits = urlparse(url) 

55 d = "" 

56 if date is not None: 

57 d = ",%s" % date.strftime("%Y-%m-%d") 

58 return "tag:%s%s:%s/%s" % (bits.hostname, d, bits.path, bits.fragment) 

59 

60 

61def _guess_stylesheet_mimetype(url): 

62 """ 

63 Return the given stylesheet's mimetype tuple, using a slightly custom 

64 version of Python's mimetypes.guess_type(). 

65 """ 

66 mimetypedb = mimetypes.MimeTypes() 

67 

68 # The official mimetype for XSLT files is technically `application/xslt+xml` 

69 # but as of 2024 almost no browser supports that (they all expect text/xsl). 

70 # On top of that, windows seems to assume that the type for xsl is text/xml. 

71 mimetypedb.readfp(StringIO("text/xsl\txsl\ntext/xsl\txslt")) 

72 

73 return mimetypedb.guess_type(url) 

74 

75 

76class Stylesheet: 

77 """An RSS stylesheet""" 

78 

79 def __init__(self, url, mimetype="", media="screen"): 

80 self._url = url 

81 self._mimetype = mimetype 

82 self.media = media 

83 

84 # Using a property to delay the evaluation of self._url as late as possible 

85 # in case of a lazy object (like reverse_lazy(...) for example). 

86 @property 

87 def url(self): 

88 return iri_to_uri(self._url) 

89 

90 @property 

91 def mimetype(self): 

92 if self._mimetype == "": 

93 return _guess_stylesheet_mimetype(self.url)[0] 

94 return self._mimetype 

95 

96 def __str__(self): 

97 data = [f'href="{self.url}"'] 

98 if self.mimetype is not None: 

99 data.append(f'type="{self.mimetype}"') 

100 if self.media is not None: 

101 data.append(f'media="{self.media}"') 

102 return " ".join(data) 

103 

104 def __repr__(self): 

105 return repr((self.url, self.mimetype, self.media)) 

106 

107 

108class SyndicationFeed: 

109 "Base class for all syndication feeds. Subclasses should provide write()" 

110 

111 def __init__( 

112 self, 

113 title, 

114 link, 

115 description, 

116 language=None, 

117 author_email=None, 

118 author_name=None, 

119 author_link=None, 

120 subtitle=None, 

121 categories=None, 

122 feed_url=None, 

123 feed_copyright=None, 

124 feed_guid=None, 

125 ttl=None, 

126 stylesheets=None, 

127 **kwargs, 

128 ): 

129 def to_str(s): 

130 return str(s) if s is not None else s 

131 

132 def to_stylesheet(s): 

133 return s if isinstance(s, Stylesheet) else Stylesheet(s) 

134 

135 categories = categories and [str(c) for c in categories] 

136 

137 if stylesheets is not None: 

138 if isinstance(stylesheets, (Stylesheet, str)): 

139 raise TypeError( 

140 f"stylesheets should be a list, not {stylesheets.__class__}" 

141 ) 

142 stylesheets = [to_stylesheet(s) for s in stylesheets] 

143 

144 self.feed = { 

145 "title": to_str(title), 

146 "link": iri_to_uri(link), 

147 "description": to_str(description), 

148 "language": to_str(language), 

149 "author_email": to_str(author_email), 

150 "author_name": to_str(author_name), 

151 "author_link": iri_to_uri(author_link), 

152 "subtitle": to_str(subtitle), 

153 "categories": categories or (), 

154 "feed_url": iri_to_uri(feed_url), 

155 "feed_copyright": to_str(feed_copyright), 

156 "id": feed_guid or link, 

157 "ttl": to_str(ttl), 

158 "stylesheets": stylesheets, 

159 **kwargs, 

160 } 

161 self.items = [] 

162 

163 def add_item( 

164 self, 

165 title, 

166 link, 

167 description, 

168 author_email=None, 

169 author_name=None, 

170 author_link=None, 

171 pubdate=None, 

172 comments=None, 

173 unique_id=None, 

174 unique_id_is_permalink=None, 

175 categories=(), 

176 item_copyright=None, 

177 ttl=None, 

178 updateddate=None, 

179 enclosures=None, 

180 **kwargs, 

181 ): 

182 """ 

183 Add an item to the feed. All args are expected to be strings except 

184 pubdate and updateddate, which are datetime.datetime objects, and 

185 enclosures, which is an iterable of instances of the Enclosure class. 

186 """ 

187 

188 def to_str(s): 

189 return str(s) if s is not None else s 

190 

191 categories = categories and [to_str(c) for c in categories] 

192 self.items.append( 

193 { 

194 "title": to_str(title), 

195 "link": iri_to_uri(link), 

196 "description": to_str(description), 

197 "author_email": to_str(author_email), 

198 "author_name": to_str(author_name), 

199 "author_link": iri_to_uri(author_link), 

200 "pubdate": pubdate, 

201 "updateddate": updateddate, 

202 "comments": to_str(comments), 

203 "unique_id": to_str(unique_id), 

204 "unique_id_is_permalink": unique_id_is_permalink, 

205 "enclosures": enclosures or (), 

206 "categories": categories or (), 

207 "item_copyright": to_str(item_copyright), 

208 "ttl": to_str(ttl), 

209 **kwargs, 

210 } 

211 ) 

212 

213 def num_items(self): 

214 return len(self.items) 

215 

216 def root_attributes(self): 

217 """ 

218 Return extra attributes to place on the root (i.e. feed/channel) element. 

219 Called from write(). 

220 """ 

221 return {} 

222 

223 def add_root_elements(self, handler): 

224 """ 

225 Add elements in the root (i.e. feed/channel) element. Called 

226 from write(). 

227 """ 

228 pass 

229 

230 def add_stylesheets(self, handler): 

231 """ 

232 Add stylesheet(s) to the feed. Called from write(). 

233 """ 

234 pass 

235 

236 def item_attributes(self, item): 

237 """ 

238 Return extra attributes to place on each item (i.e. item/entry) element. 

239 """ 

240 return {} 

241 

242 def add_item_elements(self, handler, item): 

243 """ 

244 Add elements on each item (i.e. item/entry) element. 

245 """ 

246 pass 

247 

248 def write(self, outfile, encoding): 

249 """ 

250 Output the feed in the given encoding to outfile, which is a file-like 

251 object. Subclasses should override this. 

252 """ 

253 raise NotImplementedError( 

254 "subclasses of SyndicationFeed must provide a write() method" 

255 ) 

256 

257 def writeString(self, encoding): 

258 """ 

259 Return the feed in the given encoding as a string. 

260 """ 

261 s = StringIO() 

262 self.write(s, encoding) 

263 return s.getvalue() 

264 

265 def latest_post_date(self): 

266 """ 

267 Return the latest item's pubdate or updateddate. If no items 

268 have either of these attributes this return the current UTC date/time. 

269 """ 

270 latest_date = None 

271 date_keys = ("updateddate", "pubdate") 

272 

273 for item in self.items: 

274 for date_key in date_keys: 

275 item_date = item.get(date_key) 

276 if item_date: 

277 if latest_date is None or item_date > latest_date: 

278 latest_date = item_date 

279 

280 return latest_date or datetime.datetime.now(tz=datetime.timezone.utc) 

281 

282 

283class Enclosure: 

284 """An RSS enclosure""" 

285 

286 def __init__(self, url, length, mime_type): 

287 "All args are expected to be strings" 

288 self.length, self.mime_type = length, mime_type 

289 self.url = iri_to_uri(url) 

290 

291 

292class RssFeed(SyndicationFeed): 

293 content_type = "application/rss+xml; charset=utf-8" 

294 

295 def write(self, outfile, encoding): 

296 handler = SimplerXMLGenerator(outfile, encoding, short_empty_elements=True) 

297 handler.startDocument() 

298 # Any stylesheet must come after the start of the document but before any tag. 

299 # https://www.w3.org/Style/styling-XML.en.html 

300 self.add_stylesheets(handler) 

301 handler.startElement("rss", self.rss_attributes()) 

302 handler.startElement("channel", self.root_attributes()) 

303 self.add_root_elements(handler) 

304 self.write_items(handler) 

305 self.endChannelElement(handler) 

306 handler.endElement("rss") 

307 

308 def rss_attributes(self): 

309 return { 

310 "version": self._version, 

311 "xmlns:atom": "http://www.w3.org/2005/Atom", 

312 } 

313 

314 def write_items(self, handler): 

315 for item in self.items: 

316 handler.startElement("item", self.item_attributes(item)) 

317 self.add_item_elements(handler, item) 

318 handler.endElement("item") 

319 

320 def add_stylesheets(self, handler): 

321 for stylesheet in self.feed["stylesheets"] or []: 

322 handler.processingInstruction("xml-stylesheet", stylesheet) 

323 

324 def add_root_elements(self, handler): 

325 handler.addQuickElement("title", self.feed["title"]) 

326 handler.addQuickElement("link", self.feed["link"]) 

327 handler.addQuickElement("description", self.feed["description"]) 

328 if self.feed["feed_url"] is not None: 

329 handler.addQuickElement( 

330 "atom:link", None, {"rel": "self", "href": self.feed["feed_url"]} 

331 ) 

332 if self.feed["language"] is not None: 

333 handler.addQuickElement("language", self.feed["language"]) 

334 for cat in self.feed["categories"]: 

335 handler.addQuickElement("category", cat) 

336 if self.feed["feed_copyright"] is not None: 

337 handler.addQuickElement("copyright", self.feed["feed_copyright"]) 

338 handler.addQuickElement("lastBuildDate", rfc2822_date(self.latest_post_date())) 

339 if self.feed["ttl"] is not None: 

340 handler.addQuickElement("ttl", self.feed["ttl"]) 

341 

342 def endChannelElement(self, handler): 

343 handler.endElement("channel") 

344 

345 

346class RssUserland091Feed(RssFeed): 

347 _version = "0.91" 

348 

349 def add_item_elements(self, handler, item): 

350 handler.addQuickElement("title", item["title"]) 

351 handler.addQuickElement("link", item["link"]) 

352 if item["description"] is not None: 

353 handler.addQuickElement("description", item["description"]) 

354 

355 

356class Rss201rev2Feed(RssFeed): 

357 # Spec: https://cyber.harvard.edu/rss/rss.html 

358 _version = "2.0" 

359 

360 def add_item_elements(self, handler, item): 

361 handler.addQuickElement("title", item["title"]) 

362 handler.addQuickElement("link", item["link"]) 

363 if item["description"] is not None: 

364 handler.addQuickElement("description", item["description"]) 

365 

366 # Author information. 

367 if item["author_name"] and item["author_email"]: 

368 handler.addQuickElement( 

369 "author", "%s (%s)" % (item["author_email"], item["author_name"]) 

370 ) 

371 elif item["author_email"]: 

372 handler.addQuickElement("author", item["author_email"]) 

373 elif item["author_name"]: 

374 handler.addQuickElement( 

375 "dc:creator", 

376 item["author_name"], 

377 {"xmlns:dc": "http://purl.org/dc/elements/1.1/"}, 

378 ) 

379 

380 if item["pubdate"] is not None: 

381 handler.addQuickElement("pubDate", rfc2822_date(item["pubdate"])) 

382 if item["comments"] is not None: 

383 handler.addQuickElement("comments", item["comments"]) 

384 if item["unique_id"] is not None: 

385 guid_attrs = {} 

386 if isinstance(item.get("unique_id_is_permalink"), bool): 

387 guid_attrs["isPermaLink"] = str(item["unique_id_is_permalink"]).lower() 

388 handler.addQuickElement("guid", item["unique_id"], guid_attrs) 

389 if item["ttl"] is not None: 

390 handler.addQuickElement("ttl", item["ttl"]) 

391 

392 # Enclosure. 

393 if item["enclosures"]: 

394 enclosures = list(item["enclosures"]) 

395 if len(enclosures) > 1: 

396 raise ValueError( 

397 "RSS feed items may only have one enclosure, see " 

398 "http://www.rssboard.org/rss-profile#element-channel-item-enclosure" 

399 ) 

400 enclosure = enclosures[0] 

401 handler.addQuickElement( 

402 "enclosure", 

403 "", 

404 { 

405 "url": enclosure.url, 

406 "length": enclosure.length, 

407 "type": enclosure.mime_type, 

408 }, 

409 ) 

410 

411 # Categories. 

412 for cat in item["categories"]: 

413 handler.addQuickElement("category", cat) 

414 

415 

416class Atom1Feed(SyndicationFeed): 

417 # Spec: https://tools.ietf.org/html/rfc4287 

418 content_type = "application/atom+xml; charset=utf-8" 

419 ns = "http://www.w3.org/2005/Atom" 

420 

421 def write(self, outfile, encoding): 

422 handler = SimplerXMLGenerator(outfile, encoding, short_empty_elements=True) 

423 handler.startDocument() 

424 handler.startElement("feed", self.root_attributes()) 

425 self.add_root_elements(handler) 

426 self.write_items(handler) 

427 handler.endElement("feed") 

428 

429 def root_attributes(self): 

430 if self.feed["language"] is not None: 

431 return {"xmlns": self.ns, "xml:lang": self.feed["language"]} 

432 else: 

433 return {"xmlns": self.ns} 

434 

435 def add_root_elements(self, handler): 

436 handler.addQuickElement("title", self.feed["title"]) 

437 handler.addQuickElement( 

438 "link", "", {"rel": "alternate", "href": self.feed["link"]} 

439 ) 

440 if self.feed["feed_url"] is not None: 

441 handler.addQuickElement( 

442 "link", "", {"rel": "self", "href": self.feed["feed_url"]} 

443 ) 

444 handler.addQuickElement("id", self.feed["id"]) 

445 handler.addQuickElement("updated", rfc3339_date(self.latest_post_date())) 

446 if self.feed["author_name"] is not None: 

447 handler.startElement("author", {}) 

448 handler.addQuickElement("name", self.feed["author_name"]) 

449 if self.feed["author_email"] is not None: 

450 handler.addQuickElement("email", self.feed["author_email"]) 

451 if self.feed["author_link"] is not None: 

452 handler.addQuickElement("uri", self.feed["author_link"]) 

453 handler.endElement("author") 

454 if self.feed["subtitle"] is not None: 

455 handler.addQuickElement("subtitle", self.feed["subtitle"]) 

456 for cat in self.feed["categories"]: 

457 handler.addQuickElement("category", "", {"term": cat}) 

458 if self.feed["feed_copyright"] is not None: 

459 handler.addQuickElement("rights", self.feed["feed_copyright"]) 

460 

461 def write_items(self, handler): 

462 for item in self.items: 

463 handler.startElement("entry", self.item_attributes(item)) 

464 self.add_item_elements(handler, item) 

465 handler.endElement("entry") 

466 

467 def add_item_elements(self, handler, item): 

468 handler.addQuickElement("title", item["title"]) 

469 handler.addQuickElement("link", "", {"href": item["link"], "rel": "alternate"}) 

470 

471 if item["pubdate"] is not None: 

472 handler.addQuickElement("published", rfc3339_date(item["pubdate"])) 

473 

474 if item["updateddate"] is not None: 

475 handler.addQuickElement("updated", rfc3339_date(item["updateddate"])) 

476 

477 # Author information. 

478 if item["author_name"] is not None: 

479 handler.startElement("author", {}) 

480 handler.addQuickElement("name", item["author_name"]) 

481 if item["author_email"] is not None: 

482 handler.addQuickElement("email", item["author_email"]) 

483 if item["author_link"] is not None: 

484 handler.addQuickElement("uri", item["author_link"]) 

485 handler.endElement("author") 

486 

487 # Unique ID. 

488 if item["unique_id"] is not None: 

489 unique_id = item["unique_id"] 

490 else: 

491 unique_id = get_tag_uri(item["link"], item["pubdate"]) 

492 handler.addQuickElement("id", unique_id) 

493 

494 # Summary. 

495 if item["description"] is not None: 

496 handler.addQuickElement("summary", item["description"], {"type": "html"}) 

497 

498 # Enclosures. 

499 for enclosure in item["enclosures"]: 

500 handler.addQuickElement( 

501 "link", 

502 "", 

503 { 

504 "rel": "enclosure", 

505 "href": enclosure.url, 

506 "length": enclosure.length, 

507 "type": enclosure.mime_type, 

508 }, 

509 ) 

510 

511 # Categories. 

512 for cat in item["categories"]: 

513 handler.addQuickElement("category", "", {"term": cat}) 

514 

515 # Rights. 

516 if item["item_copyright"] is not None: 

517 handler.addQuickElement("rights", item["item_copyright"]) 

518 

519 

520# This isolates the decision of what the system default is, so calling code can 

521# do "feedgenerator.DefaultFeed" instead of "feedgenerator.Rss201rev2Feed". 

522DefaultFeed = Rss201rev2Feed