Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/bleach/_vendor/html5lib/filters/sanitizer.py: 23%

121 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-03 06:10 +0000

1"""Deprecated from html5lib 1.1. 

2 

3See `here <https://github.com/html5lib/html5lib-python/issues/443>`_ for 

4information about its deprecation; `Bleach <https://github.com/mozilla/bleach>`_ 

5is recommended as a replacement. Please let us know in the aforementioned issue 

6if Bleach is unsuitable for your needs. 

7 

8""" 

9from __future__ import absolute_import, division, unicode_literals 

10 

11import re 

12import warnings 

13from xml.sax.saxutils import escape, unescape 

14 

15from six.moves import urllib_parse as urlparse 

16 

17from . import base 

18from ..constants import namespaces, prefixes 

19 

20__all__ = ["Filter"] 

21 

22 

23_deprecation_msg = ( 

24 "html5lib's sanitizer is deprecated; see " + 

25 "https://github.com/html5lib/html5lib-python/issues/443 and please let " + 

26 "us know if Bleach is unsuitable for your needs" 

27) 

28 

29warnings.warn(_deprecation_msg, DeprecationWarning) 

30 

31allowed_elements = frozenset(( 

32 (namespaces['html'], 'a'), 

33 (namespaces['html'], 'abbr'), 

34 (namespaces['html'], 'acronym'), 

35 (namespaces['html'], 'address'), 

36 (namespaces['html'], 'area'), 

37 (namespaces['html'], 'article'), 

38 (namespaces['html'], 'aside'), 

39 (namespaces['html'], 'audio'), 

40 (namespaces['html'], 'b'), 

41 (namespaces['html'], 'big'), 

42 (namespaces['html'], 'blockquote'), 

43 (namespaces['html'], 'br'), 

44 (namespaces['html'], 'button'), 

45 (namespaces['html'], 'canvas'), 

46 (namespaces['html'], 'caption'), 

47 (namespaces['html'], 'center'), 

48 (namespaces['html'], 'cite'), 

49 (namespaces['html'], 'code'), 

50 (namespaces['html'], 'col'), 

51 (namespaces['html'], 'colgroup'), 

52 (namespaces['html'], 'command'), 

53 (namespaces['html'], 'datagrid'), 

54 (namespaces['html'], 'datalist'), 

55 (namespaces['html'], 'dd'), 

56 (namespaces['html'], 'del'), 

57 (namespaces['html'], 'details'), 

58 (namespaces['html'], 'dfn'), 

59 (namespaces['html'], 'dialog'), 

60 (namespaces['html'], 'dir'), 

61 (namespaces['html'], 'div'), 

62 (namespaces['html'], 'dl'), 

63 (namespaces['html'], 'dt'), 

64 (namespaces['html'], 'em'), 

65 (namespaces['html'], 'event-source'), 

66 (namespaces['html'], 'fieldset'), 

67 (namespaces['html'], 'figcaption'), 

68 (namespaces['html'], 'figure'), 

69 (namespaces['html'], 'footer'), 

70 (namespaces['html'], 'font'), 

71 (namespaces['html'], 'form'), 

72 (namespaces['html'], 'header'), 

73 (namespaces['html'], 'h1'), 

74 (namespaces['html'], 'h2'), 

75 (namespaces['html'], 'h3'), 

76 (namespaces['html'], 'h4'), 

77 (namespaces['html'], 'h5'), 

78 (namespaces['html'], 'h6'), 

79 (namespaces['html'], 'hr'), 

80 (namespaces['html'], 'i'), 

81 (namespaces['html'], 'img'), 

82 (namespaces['html'], 'input'), 

83 (namespaces['html'], 'ins'), 

84 (namespaces['html'], 'keygen'), 

85 (namespaces['html'], 'kbd'), 

86 (namespaces['html'], 'label'), 

87 (namespaces['html'], 'legend'), 

88 (namespaces['html'], 'li'), 

89 (namespaces['html'], 'm'), 

90 (namespaces['html'], 'map'), 

91 (namespaces['html'], 'menu'), 

92 (namespaces['html'], 'meter'), 

93 (namespaces['html'], 'multicol'), 

94 (namespaces['html'], 'nav'), 

95 (namespaces['html'], 'nextid'), 

96 (namespaces['html'], 'ol'), 

97 (namespaces['html'], 'output'), 

98 (namespaces['html'], 'optgroup'), 

99 (namespaces['html'], 'option'), 

100 (namespaces['html'], 'p'), 

101 (namespaces['html'], 'pre'), 

102 (namespaces['html'], 'progress'), 

103 (namespaces['html'], 'q'), 

104 (namespaces['html'], 's'), 

105 (namespaces['html'], 'samp'), 

106 (namespaces['html'], 'section'), 

107 (namespaces['html'], 'select'), 

108 (namespaces['html'], 'small'), 

109 (namespaces['html'], 'sound'), 

110 (namespaces['html'], 'source'), 

111 (namespaces['html'], 'spacer'), 

112 (namespaces['html'], 'span'), 

113 (namespaces['html'], 'strike'), 

114 (namespaces['html'], 'strong'), 

115 (namespaces['html'], 'sub'), 

116 (namespaces['html'], 'sup'), 

117 (namespaces['html'], 'table'), 

118 (namespaces['html'], 'tbody'), 

119 (namespaces['html'], 'td'), 

120 (namespaces['html'], 'textarea'), 

121 (namespaces['html'], 'time'), 

122 (namespaces['html'], 'tfoot'), 

123 (namespaces['html'], 'th'), 

124 (namespaces['html'], 'thead'), 

125 (namespaces['html'], 'tr'), 

126 (namespaces['html'], 'tt'), 

127 (namespaces['html'], 'u'), 

128 (namespaces['html'], 'ul'), 

129 (namespaces['html'], 'var'), 

130 (namespaces['html'], 'video'), 

131 (namespaces['mathml'], 'maction'), 

132 (namespaces['mathml'], 'math'), 

133 (namespaces['mathml'], 'merror'), 

134 (namespaces['mathml'], 'mfrac'), 

135 (namespaces['mathml'], 'mi'), 

136 (namespaces['mathml'], 'mmultiscripts'), 

137 (namespaces['mathml'], 'mn'), 

138 (namespaces['mathml'], 'mo'), 

139 (namespaces['mathml'], 'mover'), 

140 (namespaces['mathml'], 'mpadded'), 

141 (namespaces['mathml'], 'mphantom'), 

142 (namespaces['mathml'], 'mprescripts'), 

143 (namespaces['mathml'], 'mroot'), 

144 (namespaces['mathml'], 'mrow'), 

145 (namespaces['mathml'], 'mspace'), 

146 (namespaces['mathml'], 'msqrt'), 

147 (namespaces['mathml'], 'mstyle'), 

148 (namespaces['mathml'], 'msub'), 

149 (namespaces['mathml'], 'msubsup'), 

150 (namespaces['mathml'], 'msup'), 

151 (namespaces['mathml'], 'mtable'), 

152 (namespaces['mathml'], 'mtd'), 

153 (namespaces['mathml'], 'mtext'), 

154 (namespaces['mathml'], 'mtr'), 

155 (namespaces['mathml'], 'munder'), 

156 (namespaces['mathml'], 'munderover'), 

157 (namespaces['mathml'], 'none'), 

158 (namespaces['svg'], 'a'), 

159 (namespaces['svg'], 'animate'), 

160 (namespaces['svg'], 'animateColor'), 

161 (namespaces['svg'], 'animateMotion'), 

162 (namespaces['svg'], 'animateTransform'), 

163 (namespaces['svg'], 'clipPath'), 

164 (namespaces['svg'], 'circle'), 

165 (namespaces['svg'], 'defs'), 

166 (namespaces['svg'], 'desc'), 

167 (namespaces['svg'], 'ellipse'), 

168 (namespaces['svg'], 'font-face'), 

169 (namespaces['svg'], 'font-face-name'), 

170 (namespaces['svg'], 'font-face-src'), 

171 (namespaces['svg'], 'g'), 

172 (namespaces['svg'], 'glyph'), 

173 (namespaces['svg'], 'hkern'), 

174 (namespaces['svg'], 'linearGradient'), 

175 (namespaces['svg'], 'line'), 

176 (namespaces['svg'], 'marker'), 

177 (namespaces['svg'], 'metadata'), 

178 (namespaces['svg'], 'missing-glyph'), 

179 (namespaces['svg'], 'mpath'), 

180 (namespaces['svg'], 'path'), 

181 (namespaces['svg'], 'polygon'), 

182 (namespaces['svg'], 'polyline'), 

183 (namespaces['svg'], 'radialGradient'), 

184 (namespaces['svg'], 'rect'), 

185 (namespaces['svg'], 'set'), 

186 (namespaces['svg'], 'stop'), 

187 (namespaces['svg'], 'svg'), 

188 (namespaces['svg'], 'switch'), 

189 (namespaces['svg'], 'text'), 

190 (namespaces['svg'], 'title'), 

191 (namespaces['svg'], 'tspan'), 

192 (namespaces['svg'], 'use'), 

193)) 

194 

195allowed_attributes = frozenset(( 

196 # HTML attributes 

197 (None, 'abbr'), 

198 (None, 'accept'), 

199 (None, 'accept-charset'), 

200 (None, 'accesskey'), 

201 (None, 'action'), 

202 (None, 'align'), 

203 (None, 'alt'), 

204 (None, 'autocomplete'), 

205 (None, 'autofocus'), 

206 (None, 'axis'), 

207 (None, 'background'), 

208 (None, 'balance'), 

209 (None, 'bgcolor'), 

210 (None, 'bgproperties'), 

211 (None, 'border'), 

212 (None, 'bordercolor'), 

213 (None, 'bordercolordark'), 

214 (None, 'bordercolorlight'), 

215 (None, 'bottompadding'), 

216 (None, 'cellpadding'), 

217 (None, 'cellspacing'), 

218 (None, 'ch'), 

219 (None, 'challenge'), 

220 (None, 'char'), 

221 (None, 'charoff'), 

222 (None, 'choff'), 

223 (None, 'charset'), 

224 (None, 'checked'), 

225 (None, 'cite'), 

226 (None, 'class'), 

227 (None, 'clear'), 

228 (None, 'color'), 

229 (None, 'cols'), 

230 (None, 'colspan'), 

231 (None, 'compact'), 

232 (None, 'contenteditable'), 

233 (None, 'controls'), 

234 (None, 'coords'), 

235 (None, 'data'), 

236 (None, 'datafld'), 

237 (None, 'datapagesize'), 

238 (None, 'datasrc'), 

239 (None, 'datetime'), 

240 (None, 'default'), 

241 (None, 'delay'), 

242 (None, 'dir'), 

243 (None, 'disabled'), 

244 (None, 'draggable'), 

245 (None, 'dynsrc'), 

246 (None, 'enctype'), 

247 (None, 'end'), 

248 (None, 'face'), 

249 (None, 'for'), 

250 (None, 'form'), 

251 (None, 'frame'), 

252 (None, 'galleryimg'), 

253 (None, 'gutter'), 

254 (None, 'headers'), 

255 (None, 'height'), 

256 (None, 'hidefocus'), 

257 (None, 'hidden'), 

258 (None, 'high'), 

259 (None, 'href'), 

260 (None, 'hreflang'), 

261 (None, 'hspace'), 

262 (None, 'icon'), 

263 (None, 'id'), 

264 (None, 'inputmode'), 

265 (None, 'ismap'), 

266 (None, 'keytype'), 

267 (None, 'label'), 

268 (None, 'leftspacing'), 

269 (None, 'lang'), 

270 (None, 'list'), 

271 (None, 'longdesc'), 

272 (None, 'loop'), 

273 (None, 'loopcount'), 

274 (None, 'loopend'), 

275 (None, 'loopstart'), 

276 (None, 'low'), 

277 (None, 'lowsrc'), 

278 (None, 'max'), 

279 (None, 'maxlength'), 

280 (None, 'media'), 

281 (None, 'method'), 

282 (None, 'min'), 

283 (None, 'multiple'), 

284 (None, 'name'), 

285 (None, 'nohref'), 

286 (None, 'noshade'), 

287 (None, 'nowrap'), 

288 (None, 'open'), 

289 (None, 'optimum'), 

290 (None, 'pattern'), 

291 (None, 'ping'), 

292 (None, 'point-size'), 

293 (None, 'poster'), 

294 (None, 'pqg'), 

295 (None, 'preload'), 

296 (None, 'prompt'), 

297 (None, 'radiogroup'), 

298 (None, 'readonly'), 

299 (None, 'rel'), 

300 (None, 'repeat-max'), 

301 (None, 'repeat-min'), 

302 (None, 'replace'), 

303 (None, 'required'), 

304 (None, 'rev'), 

305 (None, 'rightspacing'), 

306 (None, 'rows'), 

307 (None, 'rowspan'), 

308 (None, 'rules'), 

309 (None, 'scope'), 

310 (None, 'selected'), 

311 (None, 'shape'), 

312 (None, 'size'), 

313 (None, 'span'), 

314 (None, 'src'), 

315 (None, 'start'), 

316 (None, 'step'), 

317 (None, 'style'), 

318 (None, 'summary'), 

319 (None, 'suppress'), 

320 (None, 'tabindex'), 

321 (None, 'target'), 

322 (None, 'template'), 

323 (None, 'title'), 

324 (None, 'toppadding'), 

325 (None, 'type'), 

326 (None, 'unselectable'), 

327 (None, 'usemap'), 

328 (None, 'urn'), 

329 (None, 'valign'), 

330 (None, 'value'), 

331 (None, 'variable'), 

332 (None, 'volume'), 

333 (None, 'vspace'), 

334 (None, 'vrml'), 

335 (None, 'width'), 

336 (None, 'wrap'), 

337 (namespaces['xml'], 'lang'), 

338 # MathML attributes 

339 (None, 'actiontype'), 

340 (None, 'align'), 

341 (None, 'columnalign'), 

342 (None, 'columnalign'), 

343 (None, 'columnalign'), 

344 (None, 'columnlines'), 

345 (None, 'columnspacing'), 

346 (None, 'columnspan'), 

347 (None, 'depth'), 

348 (None, 'display'), 

349 (None, 'displaystyle'), 

350 (None, 'equalcolumns'), 

351 (None, 'equalrows'), 

352 (None, 'fence'), 

353 (None, 'fontstyle'), 

354 (None, 'fontweight'), 

355 (None, 'frame'), 

356 (None, 'height'), 

357 (None, 'linethickness'), 

358 (None, 'lspace'), 

359 (None, 'mathbackground'), 

360 (None, 'mathcolor'), 

361 (None, 'mathvariant'), 

362 (None, 'mathvariant'), 

363 (None, 'maxsize'), 

364 (None, 'minsize'), 

365 (None, 'other'), 

366 (None, 'rowalign'), 

367 (None, 'rowalign'), 

368 (None, 'rowalign'), 

369 (None, 'rowlines'), 

370 (None, 'rowspacing'), 

371 (None, 'rowspan'), 

372 (None, 'rspace'), 

373 (None, 'scriptlevel'), 

374 (None, 'selection'), 

375 (None, 'separator'), 

376 (None, 'stretchy'), 

377 (None, 'width'), 

378 (None, 'width'), 

379 (namespaces['xlink'], 'href'), 

380 (namespaces['xlink'], 'show'), 

381 (namespaces['xlink'], 'type'), 

382 # SVG attributes 

383 (None, 'accent-height'), 

384 (None, 'accumulate'), 

385 (None, 'additive'), 

386 (None, 'alphabetic'), 

387 (None, 'arabic-form'), 

388 (None, 'ascent'), 

389 (None, 'attributeName'), 

390 (None, 'attributeType'), 

391 (None, 'baseProfile'), 

392 (None, 'bbox'), 

393 (None, 'begin'), 

394 (None, 'by'), 

395 (None, 'calcMode'), 

396 (None, 'cap-height'), 

397 (None, 'class'), 

398 (None, 'clip-path'), 

399 (None, 'color'), 

400 (None, 'color-rendering'), 

401 (None, 'content'), 

402 (None, 'cx'), 

403 (None, 'cy'), 

404 (None, 'd'), 

405 (None, 'dx'), 

406 (None, 'dy'), 

407 (None, 'descent'), 

408 (None, 'display'), 

409 (None, 'dur'), 

410 (None, 'end'), 

411 (None, 'fill'), 

412 (None, 'fill-opacity'), 

413 (None, 'fill-rule'), 

414 (None, 'font-family'), 

415 (None, 'font-size'), 

416 (None, 'font-stretch'), 

417 (None, 'font-style'), 

418 (None, 'font-variant'), 

419 (None, 'font-weight'), 

420 (None, 'from'), 

421 (None, 'fx'), 

422 (None, 'fy'), 

423 (None, 'g1'), 

424 (None, 'g2'), 

425 (None, 'glyph-name'), 

426 (None, 'gradientUnits'), 

427 (None, 'hanging'), 

428 (None, 'height'), 

429 (None, 'horiz-adv-x'), 

430 (None, 'horiz-origin-x'), 

431 (None, 'id'), 

432 (None, 'ideographic'), 

433 (None, 'k'), 

434 (None, 'keyPoints'), 

435 (None, 'keySplines'), 

436 (None, 'keyTimes'), 

437 (None, 'lang'), 

438 (None, 'marker-end'), 

439 (None, 'marker-mid'), 

440 (None, 'marker-start'), 

441 (None, 'markerHeight'), 

442 (None, 'markerUnits'), 

443 (None, 'markerWidth'), 

444 (None, 'mathematical'), 

445 (None, 'max'), 

446 (None, 'min'), 

447 (None, 'name'), 

448 (None, 'offset'), 

449 (None, 'opacity'), 

450 (None, 'orient'), 

451 (None, 'origin'), 

452 (None, 'overline-position'), 

453 (None, 'overline-thickness'), 

454 (None, 'panose-1'), 

455 (None, 'path'), 

456 (None, 'pathLength'), 

457 (None, 'points'), 

458 (None, 'preserveAspectRatio'), 

459 (None, 'r'), 

460 (None, 'refX'), 

461 (None, 'refY'), 

462 (None, 'repeatCount'), 

463 (None, 'repeatDur'), 

464 (None, 'requiredExtensions'), 

465 (None, 'requiredFeatures'), 

466 (None, 'restart'), 

467 (None, 'rotate'), 

468 (None, 'rx'), 

469 (None, 'ry'), 

470 (None, 'slope'), 

471 (None, 'stemh'), 

472 (None, 'stemv'), 

473 (None, 'stop-color'), 

474 (None, 'stop-opacity'), 

475 (None, 'strikethrough-position'), 

476 (None, 'strikethrough-thickness'), 

477 (None, 'stroke'), 

478 (None, 'stroke-dasharray'), 

479 (None, 'stroke-dashoffset'), 

480 (None, 'stroke-linecap'), 

481 (None, 'stroke-linejoin'), 

482 (None, 'stroke-miterlimit'), 

483 (None, 'stroke-opacity'), 

484 (None, 'stroke-width'), 

485 (None, 'systemLanguage'), 

486 (None, 'target'), 

487 (None, 'text-anchor'), 

488 (None, 'to'), 

489 (None, 'transform'), 

490 (None, 'type'), 

491 (None, 'u1'), 

492 (None, 'u2'), 

493 (None, 'underline-position'), 

494 (None, 'underline-thickness'), 

495 (None, 'unicode'), 

496 (None, 'unicode-range'), 

497 (None, 'units-per-em'), 

498 (None, 'values'), 

499 (None, 'version'), 

500 (None, 'viewBox'), 

501 (None, 'visibility'), 

502 (None, 'width'), 

503 (None, 'widths'), 

504 (None, 'x'), 

505 (None, 'x-height'), 

506 (None, 'x1'), 

507 (None, 'x2'), 

508 (namespaces['xlink'], 'actuate'), 

509 (namespaces['xlink'], 'arcrole'), 

510 (namespaces['xlink'], 'href'), 

511 (namespaces['xlink'], 'role'), 

512 (namespaces['xlink'], 'show'), 

513 (namespaces['xlink'], 'title'), 

514 (namespaces['xlink'], 'type'), 

515 (namespaces['xml'], 'base'), 

516 (namespaces['xml'], 'lang'), 

517 (namespaces['xml'], 'space'), 

518 (None, 'y'), 

519 (None, 'y1'), 

520 (None, 'y2'), 

521 (None, 'zoomAndPan'), 

522)) 

523 

524attr_val_is_uri = frozenset(( 

525 (None, 'href'), 

526 (None, 'src'), 

527 (None, 'cite'), 

528 (None, 'action'), 

529 (None, 'longdesc'), 

530 (None, 'poster'), 

531 (None, 'background'), 

532 (None, 'datasrc'), 

533 (None, 'dynsrc'), 

534 (None, 'lowsrc'), 

535 (None, 'ping'), 

536 (namespaces['xlink'], 'href'), 

537 (namespaces['xml'], 'base'), 

538)) 

539 

540svg_attr_val_allows_ref = frozenset(( 

541 (None, 'clip-path'), 

542 (None, 'color-profile'), 

543 (None, 'cursor'), 

544 (None, 'fill'), 

545 (None, 'filter'), 

546 (None, 'marker'), 

547 (None, 'marker-start'), 

548 (None, 'marker-mid'), 

549 (None, 'marker-end'), 

550 (None, 'mask'), 

551 (None, 'stroke'), 

552)) 

553 

554svg_allow_local_href = frozenset(( 

555 (None, 'altGlyph'), 

556 (None, 'animate'), 

557 (None, 'animateColor'), 

558 (None, 'animateMotion'), 

559 (None, 'animateTransform'), 

560 (None, 'cursor'), 

561 (None, 'feImage'), 

562 (None, 'filter'), 

563 (None, 'linearGradient'), 

564 (None, 'pattern'), 

565 (None, 'radialGradient'), 

566 (None, 'textpath'), 

567 (None, 'tref'), 

568 (None, 'set'), 

569 (None, 'use') 

570)) 

571 

572allowed_css_properties = frozenset(( 

573 'azimuth', 

574 'background-color', 

575 'border-bottom-color', 

576 'border-collapse', 

577 'border-color', 

578 'border-left-color', 

579 'border-right-color', 

580 'border-top-color', 

581 'clear', 

582 'color', 

583 'cursor', 

584 'direction', 

585 'display', 

586 'elevation', 

587 'float', 

588 'font', 

589 'font-family', 

590 'font-size', 

591 'font-style', 

592 'font-variant', 

593 'font-weight', 

594 'height', 

595 'letter-spacing', 

596 'line-height', 

597 'overflow', 

598 'pause', 

599 'pause-after', 

600 'pause-before', 

601 'pitch', 

602 'pitch-range', 

603 'richness', 

604 'speak', 

605 'speak-header', 

606 'speak-numeral', 

607 'speak-punctuation', 

608 'speech-rate', 

609 'stress', 

610 'text-align', 

611 'text-decoration', 

612 'text-indent', 

613 'unicode-bidi', 

614 'vertical-align', 

615 'voice-family', 

616 'volume', 

617 'white-space', 

618 'width', 

619)) 

620 

621allowed_css_keywords = frozenset(( 

622 'auto', 

623 'aqua', 

624 'black', 

625 'block', 

626 'blue', 

627 'bold', 

628 'both', 

629 'bottom', 

630 'brown', 

631 'center', 

632 'collapse', 

633 'dashed', 

634 'dotted', 

635 'fuchsia', 

636 'gray', 

637 'green', 

638 '!important', 

639 'italic', 

640 'left', 

641 'lime', 

642 'maroon', 

643 'medium', 

644 'none', 

645 'navy', 

646 'normal', 

647 'nowrap', 

648 'olive', 

649 'pointer', 

650 'purple', 

651 'red', 

652 'right', 

653 'solid', 

654 'silver', 

655 'teal', 

656 'top', 

657 'transparent', 

658 'underline', 

659 'white', 

660 'yellow', 

661)) 

662 

663allowed_svg_properties = frozenset(( 

664 'fill', 

665 'fill-opacity', 

666 'fill-rule', 

667 'stroke', 

668 'stroke-width', 

669 'stroke-linecap', 

670 'stroke-linejoin', 

671 'stroke-opacity', 

672)) 

673 

674allowed_protocols = frozenset(( 

675 'ed2k', 

676 'ftp', 

677 'http', 

678 'https', 

679 'irc', 

680 'mailto', 

681 'news', 

682 'gopher', 

683 'nntp', 

684 'telnet', 

685 'webcal', 

686 'xmpp', 

687 'callto', 

688 'feed', 

689 'urn', 

690 'aim', 

691 'rsync', 

692 'tag', 

693 'ssh', 

694 'sftp', 

695 'rtsp', 

696 'afs', 

697 'data', 

698)) 

699 

700allowed_content_types = frozenset(( 

701 'image/png', 

702 'image/jpeg', 

703 'image/gif', 

704 'image/webp', 

705 'image/bmp', 

706 'text/plain', 

707)) 

708 

709 

710data_content_type = re.compile(r''' 

711 ^ 

712 # Match a content type <application>/<type> 

713 (?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+) 

714 # Match any character set and encoding 

715 (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?) 

716 |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?) 

717 # Assume the rest is data 

718 ,.* 

719 $ 

720 ''', 

721 re.VERBOSE) 

722 

723 

724class Filter(base.Filter): 

725 """Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes""" 

726 def __init__(self, 

727 source, 

728 allowed_elements=allowed_elements, 

729 allowed_attributes=allowed_attributes, 

730 allowed_css_properties=allowed_css_properties, 

731 allowed_css_keywords=allowed_css_keywords, 

732 allowed_svg_properties=allowed_svg_properties, 

733 allowed_protocols=allowed_protocols, 

734 allowed_content_types=allowed_content_types, 

735 attr_val_is_uri=attr_val_is_uri, 

736 svg_attr_val_allows_ref=svg_attr_val_allows_ref, 

737 svg_allow_local_href=svg_allow_local_href): 

738 """Creates a Filter 

739 

740 :arg allowed_elements: set of elements to allow--everything else will 

741 be escaped 

742 

743 :arg allowed_attributes: set of attributes to allow in 

744 elements--everything else will be stripped 

745 

746 :arg allowed_css_properties: set of CSS properties to allow--everything 

747 else will be stripped 

748 

749 :arg allowed_css_keywords: set of CSS keywords to allow--everything 

750 else will be stripped 

751 

752 :arg allowed_svg_properties: set of SVG properties to allow--everything 

753 else will be removed 

754 

755 :arg allowed_protocols: set of allowed protocols for URIs 

756 

757 :arg allowed_content_types: set of allowed content types for ``data`` URIs. 

758 

759 :arg attr_val_is_uri: set of attributes that have URI values--values 

760 that have a scheme not listed in ``allowed_protocols`` are removed 

761 

762 :arg svg_attr_val_allows_ref: set of SVG attributes that can have 

763 references 

764 

765 :arg svg_allow_local_href: set of SVG elements that can have local 

766 hrefs--these are removed 

767 

768 """ 

769 super(Filter, self).__init__(source) 

770 

771 warnings.warn(_deprecation_msg, DeprecationWarning) 

772 

773 self.allowed_elements = allowed_elements 

774 self.allowed_attributes = allowed_attributes 

775 self.allowed_css_properties = allowed_css_properties 

776 self.allowed_css_keywords = allowed_css_keywords 

777 self.allowed_svg_properties = allowed_svg_properties 

778 self.allowed_protocols = allowed_protocols 

779 self.allowed_content_types = allowed_content_types 

780 self.attr_val_is_uri = attr_val_is_uri 

781 self.svg_attr_val_allows_ref = svg_attr_val_allows_ref 

782 self.svg_allow_local_href = svg_allow_local_href 

783 

784 def __iter__(self): 

785 for token in base.Filter.__iter__(self): 

786 token = self.sanitize_token(token) 

787 if token: 

788 yield token 

789 

790 # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and 

791 # stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes 

792 # are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and 

793 # ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI 

794 # are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are 

795 # allowed. 

796 # 

797 # sanitize_html('<script> do_nasty_stuff() </script>') 

798 # => &lt;script> do_nasty_stuff() &lt;/script> 

799 # sanitize_html('<a href="javascript: sucker();">Click here for $100</a>') 

800 # => <a>Click here for $100</a> 

801 def sanitize_token(self, token): 

802 

803 # accommodate filters which use token_type differently 

804 token_type = token["type"] 

805 if token_type in ("StartTag", "EndTag", "EmptyTag"): 

806 name = token["name"] 

807 namespace = token["namespace"] 

808 if ((namespace, name) in self.allowed_elements or 

809 (namespace is None and 

810 (namespaces["html"], name) in self.allowed_elements)): 

811 return self.allowed_token(token) 

812 else: 

813 return self.disallowed_token(token) 

814 elif token_type == "Comment": 

815 pass 

816 else: 

817 return token 

818 

819 def allowed_token(self, token): 

820 if "data" in token: 

821 attrs = token["data"] 

822 attr_names = set(attrs.keys()) 

823 

824 # Remove forbidden attributes 

825 for to_remove in (attr_names - self.allowed_attributes): 

826 del token["data"][to_remove] 

827 attr_names.remove(to_remove) 

828 

829 # Remove attributes with disallowed URL values 

830 for attr in (attr_names & self.attr_val_is_uri): 

831 assert attr in attrs 

832 # I don't have a clue where this regexp comes from or why it matches those 

833 # characters, nor why we call unescape. I just know it's always been here. 

834 # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all 

835 # this will do is remove *more* than it otherwise would. 

836 val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '', 

837 unescape(attrs[attr])).lower() 

838 # remove replacement characters from unescaped characters 

839 val_unescaped = val_unescaped.replace("\ufffd", "") 

840 try: 

841 uri = urlparse.urlparse(val_unescaped) 

842 except ValueError: 

843 uri = None 

844 del attrs[attr] 

845 if uri and uri.scheme: 

846 if uri.scheme not in self.allowed_protocols: 

847 del attrs[attr] 

848 if uri.scheme == 'data': 

849 m = data_content_type.match(uri.path) 

850 if not m: 

851 del attrs[attr] 

852 elif m.group('content_type') not in self.allowed_content_types: 

853 del attrs[attr] 

854 

855 for attr in self.svg_attr_val_allows_ref: 

856 if attr in attrs: 

857 attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', 

858 ' ', 

859 unescape(attrs[attr])) 

860 if (token["name"] in self.svg_allow_local_href and 

861 (namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*', 

862 attrs[(namespaces['xlink'], 'href')])): 

863 del attrs[(namespaces['xlink'], 'href')] 

864 if (None, 'style') in attrs: 

865 attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')]) 

866 token["data"] = attrs 

867 return token 

868 

869 def disallowed_token(self, token): 

870 token_type = token["type"] 

871 if token_type == "EndTag": 

872 token["data"] = "</%s>" % token["name"] 

873 elif token["data"]: 

874 assert token_type in ("StartTag", "EmptyTag") 

875 attrs = [] 

876 for (ns, name), v in token["data"].items(): 

877 attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v))) 

878 token["data"] = "<%s%s>" % (token["name"], ''.join(attrs)) 

879 else: 

880 token["data"] = "<%s>" % token["name"] 

881 if token.get("selfClosing"): 

882 token["data"] = token["data"][:-1] + "/>" 

883 

884 token["type"] = "Characters" 

885 

886 del token["name"] 

887 return token 

888 

889 def sanitize_css(self, style): 

890 # disallow urls 

891 style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) 

892 

893 # gauntlet 

894 if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): 

895 return '' 

896 if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): 

897 return '' 

898 

899 clean = [] 

900 for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style): 

901 if not value: 

902 continue 

903 if prop.lower() in self.allowed_css_properties: 

904 clean.append(prop + ': ' + value + ';') 

905 elif prop.split('-')[0].lower() in ['background', 'border', 'margin', 

906 'padding']: 

907 for keyword in value.split(): 

908 if keyword not in self.allowed_css_keywords and \ 

909 not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa 

910 break 

911 else: 

912 clean.append(prop + ': ' + value + ';') 

913 elif prop.lower() in self.allowed_svg_properties: 

914 clean.append(prop + ': ' + value + ';') 

915 

916 return ' '.join(clean)