Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/lxml/isoschematron/__init__.py: 61%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

132 statements  

1"""The ``lxml.isoschematron`` package implements ISO Schematron support on top 

2of the pure-xslt 'skeleton' implementation. 

3""" 

4 

5import sys 

6import os.path 

7from lxml import etree as _etree # due to validator __init__ signature 

8 

9 

10# some compat stuff, borrowed from lxml.html 

11try: 

12 unicode 

13except NameError: 

14 # Python 3 

15 unicode = str 

16try: 

17 basestring 

18except NameError: 

19 # Python 3 

20 basestring = str 

21 

22 

23__all__ = ['extract_xsd', 'extract_rng', 'iso_dsdl_include', 

24 'iso_abstract_expand', 'iso_svrl_for_xslt1', 

25 'svrl_validation_errors', 'schematron_schema_valid', 

26 'stylesheet_params', 'Schematron'] 

27 

28 

29# some namespaces 

30#FIXME: Maybe lxml should provide a dedicated place for common namespace 

31#FIXME: definitions? 

32XML_SCHEMA_NS = "http://www.w3.org/2001/XMLSchema" 

33RELAXNG_NS = "http://relaxng.org/ns/structure/1.0" 

34SCHEMATRON_NS = "http://purl.oclc.org/dsdl/schematron" 

35SVRL_NS = "http://purl.oclc.org/dsdl/svrl" 

36 

37 

38# some helpers 

39_schematron_root = '{%s}schema' % SCHEMATRON_NS 

40_xml_schema_root = '{%s}schema' % XML_SCHEMA_NS 

41_resources_dir = os.path.join(os.path.dirname(__file__), 'resources') 

42 

43 

44# the iso-schematron skeleton implementation steps aka xsl transformations 

45extract_xsd = _etree.XSLT(_etree.parse( 

46 os.path.join(_resources_dir, 'xsl', 'XSD2Schtrn.xsl'))) 

47extract_rng = _etree.XSLT(_etree.parse( 

48 os.path.join(_resources_dir, 'xsl', 'RNG2Schtrn.xsl'))) 

49iso_dsdl_include = _etree.XSLT(_etree.parse( 

50 os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1', 

51 'iso_dsdl_include.xsl'))) 

52iso_abstract_expand = _etree.XSLT(_etree.parse( 

53 os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1', 

54 'iso_abstract_expand.xsl'))) 

55iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse( 

56 os.path.join(_resources_dir, 

57 'xsl', 'iso-schematron-xslt1', 'iso_svrl_for_xslt1.xsl'))) 

58 

59 

60# svrl result accessors 

61svrl_validation_errors = _etree.XPath( 

62 '//svrl:failed-assert', namespaces={'svrl': SVRL_NS}) 

63 

64# RelaxNG validator for schematron schemas 

65schematron_schema_valid_supported = False 

66try: 

67 schematron_schema_valid = _etree.RelaxNG( 

68 file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng')) 

69 schematron_schema_valid_supported = True 

70except _etree.RelaxNGParseError: 

71 # Some distributions delete the file due to licensing issues. 

72 def schematron_schema_valid(arg): 

73 raise NotImplementedError("Validating the ISO schematron requires iso-schematron.rng") 

74 

75 

76def stylesheet_params(**kwargs): 

77 """Convert keyword args to a dictionary of stylesheet parameters. 

78 XSL stylesheet parameters must be XPath expressions, i.e.: 

79 

80 * string expressions, like "'5'" 

81 * simple (number) expressions, like "5" 

82 * valid XPath expressions, like "/a/b/text()" 

83 

84 This function converts native Python keyword arguments to stylesheet 

85 parameters following these rules: 

86 If an arg is a string wrap it with XSLT.strparam(). 

87 If an arg is an XPath object use its path string. 

88 If arg is None raise TypeError. 

89 Else convert arg to string. 

90 """ 

91 result = {} 

92 for key, val in kwargs.items(): 

93 if isinstance(val, basestring): 

94 val = _etree.XSLT.strparam(val) 

95 elif val is None: 

96 raise TypeError('None not allowed as a stylesheet parameter') 

97 elif not isinstance(val, _etree.XPath): 

98 val = unicode(val) 

99 result[key] = val 

100 return result 

101 

102 

103# helper function for use in Schematron __init__ 

104def _stylesheet_param_dict(paramsDict, kwargsDict): 

105 """Return a copy of paramsDict, updated with kwargsDict entries, wrapped as 

106 stylesheet arguments. 

107 kwargsDict entries with a value of None are ignored. 

108 """ 

109 # beware of changing mutable default arg 

110 paramsDict = dict(paramsDict) 

111 for k, v in kwargsDict.items(): 

112 if v is not None: # None values do not override 

113 paramsDict[k] = v 

114 paramsDict = stylesheet_params(**paramsDict) 

115 return paramsDict 

116 

117 

118class Schematron(_etree._Validator): 

119 """An ISO Schematron validator. 

120 

121 Pass a root Element or an ElementTree to turn it into a validator. 

122 Alternatively, pass a filename as keyword argument 'file' to parse from 

123 the file system. 

124 

125 Schematron is a less well known, but very powerful schema language. 

126 The main idea is to use the capabilities of XPath to put restrictions on 

127 the structure and the content of XML documents. 

128 

129 The standard behaviour is to fail on ``failed-assert`` findings only 

130 (``ASSERTS_ONLY``). To change this, you can either pass a report filter 

131 function to the ``error_finder`` parameter (e.g. ``ASSERTS_AND_REPORTS`` 

132 or a custom ``XPath`` object), or subclass isoschematron.Schematron for 

133 complete control of the validation process. 

134 

135 Built on the Schematron language 'reference' skeleton pure-xslt 

136 implementation, the validator is created as an XSLT 1.0 stylesheet using 

137 these steps: 

138 

139 0) (Extract from XML Schema or RelaxNG schema) 

140 1) Process inclusions 

141 2) Process abstract patterns 

142 3) Compile the schematron schema to XSLT 

143 

144 The ``include`` and ``expand`` keyword arguments can be used to switch off 

145 steps 1) and 2). 

146 To set parameters for steps 1), 2) and 3) hand parameter dictionaries to the 

147 keyword arguments ``include_params``, ``expand_params`` or 

148 ``compile_params``. 

149 For convenience, the compile-step parameter ``phase`` is also exposed as a 

150 keyword argument ``phase``. This takes precedence if the parameter is also 

151 given in the parameter dictionary. 

152 

153 If ``store_schematron`` is set to True, the (included-and-expanded) 

154 schematron document tree is stored and available through the ``schematron`` 

155 property. 

156 If ``store_xslt`` is set to True, the validation XSLT document tree will be 

157 stored and can be retrieved through the ``validator_xslt`` property. 

158 With ``store_report`` set to True (default: False), the resulting validation 

159 report document gets stored and can be accessed as the ``validation_report`` 

160 property. 

161 

162 If ``validate_schema`` is set to False, the validation of the schema file 

163 itself is disabled. Validation happens by default after building the full 

164 schema, unless the schema validation file cannot be found at import time, 

165 in which case the validation gets disabled. Some lxml distributions exclude 

166 this file due to licensing issues. ISO-Schematron validation can then still 

167 be used normally, but the schemas themselves cannot be validated. 

168 

169 Here is a usage example:: 

170 

171 >>> from lxml import etree 

172 >>> from lxml.isoschematron import Schematron 

173 

174 >>> schematron = Schematron(etree.XML(''' 

175 ... <schema xmlns="http://purl.oclc.org/dsdl/schematron" > 

176 ... <pattern id="id_only_attribute"> 

177 ... <title>id is the only permitted attribute name</title> 

178 ... <rule context="*"> 

179 ... <report test="@*[not(name()='id')]">Attribute 

180 ... <name path="@*[not(name()='id')]"/> is forbidden<name/> 

181 ... </report> 

182 ... </rule> 

183 ... </pattern> 

184 ... </schema>'''), 

185 ... error_finder=Schematron.ASSERTS_AND_REPORTS) 

186 

187 >>> xml = etree.XML(''' 

188 ... <AAA name="aaa"> 

189 ... <BBB id="bbb"/> 

190 ... <CCC color="ccc"/> 

191 ... </AAA> 

192 ... ''') 

193 

194 >>> schematron.validate(xml) 

195 False 

196 

197 >>> xml = etree.XML(''' 

198 ... <AAA id="aaa"> 

199 ... <BBB id="bbb"/> 

200 ... <CCC/> 

201 ... </AAA> 

202 ... ''') 

203 

204 >>> schematron.validate(xml) 

205 True 

206 """ 

207 

208 # libxml2 error categorization for validation errors 

209 _domain = _etree.ErrorDomains.SCHEMATRONV 

210 _level = _etree.ErrorLevels.ERROR 

211 _error_type = _etree.ErrorTypes.SCHEMATRONV_ASSERT 

212 

213 # convenience definitions for common behaviours 

214 ASSERTS_ONLY = svrl_validation_errors # Default 

215 ASSERTS_AND_REPORTS = _etree.XPath( 

216 '//svrl:failed-assert | //svrl:successful-report', 

217 namespaces={'svrl': SVRL_NS}) 

218 

219 def _extract(self, element): 

220 """Extract embedded schematron schema from non-schematron host schema. 

221 This method will only be called by __init__ if the given schema document 

222 is not a schematron schema by itself. 

223 Must return a schematron schema document tree or None. 

224 """ 

225 schematron = None 

226 if element.tag == _xml_schema_root: 

227 schematron = self._extract_xsd(element) 

228 elif element.nsmap.get(element.prefix) == RELAXNG_NS: 

229 # RelaxNG does not have a single unique root element 

230 schematron = self._extract_rng(element) 

231 return schematron 

232 

233 # customization points 

234 # etree.XSLT objects that provide the extract, include, expand, compile 

235 # steps 

236 _extract_xsd = extract_xsd 

237 _extract_rng = extract_rng 

238 _include = iso_dsdl_include 

239 _expand = iso_abstract_expand 

240 _compile = iso_svrl_for_xslt1 

241 

242 # etree.xpath object that determines input document validity when applied to 

243 # the svrl result report; must return a list of result elements (empty if 

244 # valid) 

245 _validation_errors = ASSERTS_ONLY 

246 

247 def __init__(self, etree=None, file=None, include=True, expand=True, 

248 include_params={}, expand_params={}, compile_params={}, 

249 store_schematron=False, store_xslt=False, store_report=False, 

250 phase=None, error_finder=ASSERTS_ONLY, 

251 validate_schema=schematron_schema_valid_supported): 

252 super().__init__() 

253 

254 self._store_report = store_report 

255 self._schematron = None 

256 self._validator_xslt = None 

257 self._validation_report = None 

258 if error_finder is not self.ASSERTS_ONLY: 

259 self._validation_errors = error_finder 

260 

261 # parse schema document, may be a schematron schema or an XML Schema or 

262 # a RelaxNG schema with embedded schematron rules 

263 root = None 

264 try: 

265 if etree is not None: 

266 if _etree.iselement(etree): 

267 root = etree 

268 else: 

269 root = etree.getroot() 

270 elif file is not None: 

271 root = _etree.parse(file).getroot() 

272 except Exception: 

273 raise _etree.SchematronParseError( 

274 "No tree or file given: %s" % sys.exc_info()[1]) 

275 if root is None: 

276 raise ValueError("Empty tree") 

277 if root.tag == _schematron_root: 

278 schematron = root 

279 else: 

280 schematron = self._extract(root) 

281 if schematron is None: 

282 raise _etree.SchematronParseError( 

283 "Document is not a schematron schema or schematron-extractable") 

284 # perform the iso-schematron skeleton implementation steps to get a 

285 # validating xslt 

286 if include: 

287 schematron = self._include(schematron, **include_params) 

288 if expand: 

289 schematron = self._expand(schematron, **expand_params) 

290 if validate_schema and not schematron_schema_valid(schematron): 

291 raise _etree.SchematronParseError( 

292 "invalid schematron schema: %s" % 

293 schematron_schema_valid.error_log) 

294 if store_schematron: 

295 self._schematron = schematron 

296 # add new compile keyword args here if exposing them 

297 compile_kwargs = {'phase': phase} 

298 compile_params = _stylesheet_param_dict(compile_params, compile_kwargs) 

299 validator_xslt = self._compile(schematron, **compile_params) 

300 if store_xslt: 

301 self._validator_xslt = validator_xslt 

302 self._validator = _etree.XSLT(validator_xslt) 

303 

304 def __call__(self, etree): 

305 """Validate doc using Schematron. 

306 

307 Returns true if document is valid, false if not. 

308 """ 

309 self._clear_error_log() 

310 result = self._validator(etree) 

311 if self._store_report: 

312 self._validation_report = result 

313 errors = self._validation_errors(result) 

314 if errors: 

315 if _etree.iselement(etree): 

316 fname = etree.getroottree().docinfo.URL or '<file>' 

317 else: 

318 fname = etree.docinfo.URL or '<file>' 

319 for error in errors: 

320 # Does svrl report the line number, anywhere? Don't think so. 

321 self._append_log_message( 

322 domain=self._domain, type=self._error_type, 

323 level=self._level, line=0, 

324 message=_etree.tostring(error, encoding='unicode'), 

325 filename=fname) 

326 return False 

327 return True 

328 

329 @property 

330 def schematron(self): 

331 """ISO-schematron schema document (None if object has been initialized 

332 with store_schematron=False). 

333 """ 

334 return self._schematron 

335 

336 @property 

337 def validator_xslt(self): 

338 """ISO-schematron skeleton implementation XSLT validator document (None 

339 if object has been initialized with store_xslt=False). 

340 """ 

341 return self._validator_xslt 

342 

343 @property 

344 def validation_report(self): 

345 """ISO-schematron validation result report (None if result-storing has 

346 been turned off). 

347 """ 

348 return self._validation_report