Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/readwrite/graphml.py: 43%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

419 statements  

1""" 

2******* 

3GraphML 

4******* 

5Read and write graphs in GraphML format. 

6 

7.. warning:: 

8 

9 This parser uses the standard xml library present in Python, which is 

10 insecure - see :external+python:mod:`xml` for additional information. 

11 Only parse GraphML files you trust. 

12 

13This implementation does not support mixed graphs (directed and unidirected 

14edges together), hyperedges, nested graphs, or ports. 

15 

16"GraphML is a comprehensive and easy-to-use file format for graphs. It 

17consists of a language core to describe the structural properties of a 

18graph and a flexible extension mechanism to add application-specific 

19data. Its main features include support of 

20 

21 * directed, undirected, and mixed graphs, 

22 * hypergraphs, 

23 * hierarchical graphs, 

24 * graphical representations, 

25 * references to external data, 

26 * application-specific attribute data, and 

27 * light-weight parsers. 

28 

29Unlike many other file formats for graphs, GraphML does not use a 

30custom syntax. Instead, it is based on XML and hence ideally suited as 

31a common denominator for all kinds of services generating, archiving, 

32or processing graphs." 

33 

34http://graphml.graphdrawing.org/ 

35 

36Format 

37------ 

38GraphML is an XML format. See 

39http://graphml.graphdrawing.org/specification.html for the specification and 

40http://graphml.graphdrawing.org/primer/graphml-primer.html 

41for examples. 

42""" 

43 

44import warnings 

45from collections import defaultdict 

46 

47import networkx as nx 

48from networkx.utils import open_file 

49 

50__all__ = [ 

51 "write_graphml", 

52 "read_graphml", 

53 "generate_graphml", 

54 "write_graphml_xml", 

55 "write_graphml_lxml", 

56 "parse_graphml", 

57 "GraphMLWriter", 

58 "GraphMLReader", 

59] 

60 

61 

62@open_file(1, mode="wb") 

63def write_graphml_xml( 

64 G, 

65 path, 

66 encoding="utf-8", 

67 prettyprint=True, 

68 infer_numeric_types=False, 

69 named_key_ids=False, 

70 edge_id_from_attribute=None, 

71): 

72 """Write G in GraphML XML format to path 

73 

74 Parameters 

75 ---------- 

76 G : graph 

77 A networkx graph 

78 path : file or string 

79 File or filename to write. 

80 Filenames ending in .gz or .bz2 will be compressed. 

81 encoding : string (optional) 

82 Encoding for text data. 

83 prettyprint : bool (optional) 

84 If True use line breaks and indenting in output XML. 

85 infer_numeric_types : boolean 

86 Determine if numeric types should be generalized. 

87 For example, if edges have both int and float 'weight' attributes, 

88 we infer in GraphML that both are floats. 

89 named_key_ids : bool (optional) 

90 If True use attr.name as value for key elements' id attribute. 

91 edge_id_from_attribute : dict key (optional) 

92 If provided, the graphml edge id is set by looking up the corresponding 

93 edge data attribute keyed by this parameter. If `None` or the key does not exist in edge data, 

94 the edge id is set by the edge key if `G` is a MultiGraph, else the edge id is left unset. 

95 

96 Examples 

97 -------- 

98 >>> G = nx.path_graph(4) 

99 >>> nx.write_graphml(G, "test.graphml") 

100 

101 Notes 

102 ----- 

103 This implementation does not support mixed graphs (directed 

104 and unidirected edges together) hyperedges, nested graphs, or ports. 

105 """ 

106 writer = GraphMLWriter( 

107 encoding=encoding, 

108 prettyprint=prettyprint, 

109 infer_numeric_types=infer_numeric_types, 

110 named_key_ids=named_key_ids, 

111 edge_id_from_attribute=edge_id_from_attribute, 

112 ) 

113 writer.add_graph_element(G) 

114 writer.dump(path) 

115 

116 

117@open_file(1, mode="wb") 

118def write_graphml_lxml( 

119 G, 

120 path, 

121 encoding="utf-8", 

122 prettyprint=True, 

123 infer_numeric_types=False, 

124 named_key_ids=False, 

125 edge_id_from_attribute=None, 

126): 

127 """Write G in GraphML XML format to path 

128 

129 This function uses the LXML framework and should be faster than 

130 the version using the xml library. 

131 

132 Parameters 

133 ---------- 

134 G : graph 

135 A networkx graph 

136 path : file or string 

137 File or filename to write. 

138 Filenames ending in .gz or .bz2 will be compressed. 

139 encoding : string (optional) 

140 Encoding for text data. 

141 prettyprint : bool (optional) 

142 If True use line breaks and indenting in output XML. 

143 infer_numeric_types : boolean 

144 Determine if numeric types should be generalized. 

145 For example, if edges have both int and float 'weight' attributes, 

146 we infer in GraphML that both are floats. 

147 named_key_ids : bool (optional) 

148 If True use attr.name as value for key elements' id attribute. 

149 edge_id_from_attribute : dict key (optional) 

150 If provided, the graphml edge id is set by looking up the corresponding 

151 edge data attribute keyed by this parameter. If `None` or the key does not exist in edge data, 

152 the edge id is set by the edge key if `G` is a MultiGraph, else the edge id is left unset. 

153 

154 Examples 

155 -------- 

156 >>> G = nx.path_graph(4) 

157 >>> nx.write_graphml_lxml(G, "fourpath.graphml") 

158 

159 Notes 

160 ----- 

161 This implementation does not support mixed graphs (directed 

162 and unidirected edges together) hyperedges, nested graphs, or ports. 

163 """ 

164 try: 

165 import lxml.etree as lxmletree 

166 except ImportError: 

167 return write_graphml_xml( 

168 G, 

169 path, 

170 encoding, 

171 prettyprint, 

172 infer_numeric_types, 

173 named_key_ids, 

174 edge_id_from_attribute, 

175 ) 

176 

177 writer = GraphMLWriterLxml( 

178 path, 

179 graph=G, 

180 encoding=encoding, 

181 prettyprint=prettyprint, 

182 infer_numeric_types=infer_numeric_types, 

183 named_key_ids=named_key_ids, 

184 edge_id_from_attribute=edge_id_from_attribute, 

185 ) 

186 writer.dump() 

187 

188 

189def generate_graphml( 

190 G, 

191 encoding="utf-8", 

192 prettyprint=True, 

193 named_key_ids=False, 

194 edge_id_from_attribute=None, 

195): 

196 """Generate GraphML lines for G 

197 

198 Parameters 

199 ---------- 

200 G : graph 

201 A networkx graph 

202 encoding : string (optional) 

203 Encoding for text data. 

204 prettyprint : bool (optional) 

205 If True use line breaks and indenting in output XML. 

206 named_key_ids : bool (optional) 

207 If True use attr.name as value for key elements' id attribute. 

208 edge_id_from_attribute : dict key (optional) 

209 If provided, the graphml edge id is set by looking up the corresponding 

210 edge data attribute keyed by this parameter. If `None` or the key does not exist in edge data, 

211 the edge id is set by the edge key if `G` is a MultiGraph, else the edge id is left unset. 

212 

213 Examples 

214 -------- 

215 >>> G = nx.path_graph(4) 

216 >>> linefeed = chr(10) # linefeed = \n 

217 >>> s = linefeed.join(nx.generate_graphml(G)) 

218 >>> for line in nx.generate_graphml(G): # doctest: +SKIP 

219 ... print(line) 

220 

221 Notes 

222 ----- 

223 This implementation does not support mixed graphs (directed and unidirected 

224 edges together) hyperedges, nested graphs, or ports. 

225 """ 

226 writer = GraphMLWriter( 

227 encoding=encoding, 

228 prettyprint=prettyprint, 

229 named_key_ids=named_key_ids, 

230 edge_id_from_attribute=edge_id_from_attribute, 

231 ) 

232 writer.add_graph_element(G) 

233 yield from str(writer).splitlines() 

234 

235 

236@open_file(0, mode="rb") 

237@nx._dispatchable(graphs=None, returns_graph=True) 

238def read_graphml(path, node_type=str, edge_key_type=int, force_multigraph=False): 

239 """Read graph in GraphML format from path. 

240 

241 Parameters 

242 ---------- 

243 path : file or string 

244 Filename or file handle to read. 

245 Filenames ending in .gz or .bz2 will be decompressed. 

246 

247 node_type: Python type (default: str) 

248 Convert node ids to this type 

249 

250 edge_key_type: Python type (default: int) 

251 Convert graphml edge ids to this type. Multigraphs use id as edge key. 

252 Non-multigraphs add to edge attribute dict with name "id". 

253 

254 force_multigraph : bool (default: False) 

255 If True, return a multigraph with edge keys. If False (the default) 

256 return a multigraph when multiedges are in the graph. 

257 

258 Returns 

259 ------- 

260 graph: NetworkX graph 

261 If parallel edges are present or `force_multigraph=True` then 

262 a MultiGraph or MultiDiGraph is returned. Otherwise a Graph/DiGraph. 

263 The returned graph is directed if the file indicates it should be. 

264 

265 Notes 

266 ----- 

267 Default node and edge attributes are not propagated to each node and edge. 

268 They can be obtained from `G.graph` and applied to node and edge attributes 

269 if desired using something like this: 

270 

271 >>> default_color = G.graph["node_default"]["color"] # doctest: +SKIP 

272 >>> for node, data in G.nodes(data=True): # doctest: +SKIP 

273 ... if "color" not in data: 

274 ... data["color"] = default_color 

275 >>> default_color = G.graph["edge_default"]["color"] # doctest: +SKIP 

276 >>> for u, v, data in G.edges(data=True): # doctest: +SKIP 

277 ... if "color" not in data: 

278 ... data["color"] = default_color 

279 

280 This implementation does not support mixed graphs (directed and unidirected 

281 edges together), hypergraphs, nested graphs, or ports. 

282 

283 For multigraphs the GraphML edge "id" will be used as the edge 

284 key. If not specified then they "key" attribute will be used. If 

285 there is no "key" attribute a default NetworkX multigraph edge key 

286 will be provided. 

287 

288 Files with the yEd "yfiles" extension can be read. The type of the node's 

289 shape is preserved in the `shape_type` node attribute. 

290 

291 yEd compressed files ("file.graphmlz" extension) can be read by renaming 

292 the file to "file.graphml.gz". 

293 

294 """ 

295 reader = GraphMLReader(node_type, edge_key_type, force_multigraph) 

296 # need to check for multiple graphs 

297 glist = list(reader(path=path)) 

298 if len(glist) == 0: 

299 # If no graph comes back, try looking for an incomplete header 

300 header = b'<graphml xmlns="http://graphml.graphdrawing.org/xmlns">' 

301 path.seek(0) 

302 old_bytes = path.read() 

303 new_bytes = old_bytes.replace(b"<graphml>", header) 

304 glist = list(reader(string=new_bytes)) 

305 if len(glist) == 0: 

306 raise nx.NetworkXError("file not successfully read as graphml") 

307 return glist[0] 

308 

309 

310@nx._dispatchable(graphs=None, returns_graph=True) 

311def parse_graphml( 

312 graphml_string, node_type=str, edge_key_type=int, force_multigraph=False 

313): 

314 """Read graph in GraphML format from string. 

315 

316 Parameters 

317 ---------- 

318 graphml_string : string 

319 String containing graphml information 

320 (e.g., contents of a graphml file). 

321 

322 node_type: Python type (default: str) 

323 Convert node ids to this type 

324 

325 edge_key_type: Python type (default: int) 

326 Convert graphml edge ids to this type. Multigraphs use id as edge key. 

327 Non-multigraphs add to edge attribute dict with name "id". 

328 

329 force_multigraph : bool (default: False) 

330 If True, return a multigraph with edge keys. If False (the default) 

331 return a multigraph when multiedges are in the graph. 

332 

333 

334 Returns 

335 ------- 

336 graph: NetworkX graph 

337 If no parallel edges are found a Graph or DiGraph is returned. 

338 Otherwise a MultiGraph or MultiDiGraph is returned. 

339 

340 Examples 

341 -------- 

342 >>> G = nx.path_graph(4) 

343 >>> linefeed = chr(10) # linefeed = \n 

344 >>> s = linefeed.join(nx.generate_graphml(G)) 

345 >>> H = nx.parse_graphml(s) 

346 

347 Notes 

348 ----- 

349 Default node and edge attributes are not propagated to each node and edge. 

350 They can be obtained from `G.graph` and applied to node and edge attributes 

351 if desired using something like this: 

352 

353 >>> default_color = G.graph["node_default"]["color"] # doctest: +SKIP 

354 >>> for node, data in G.nodes(data=True): # doctest: +SKIP 

355 ... if "color" not in data: 

356 ... data["color"] = default_color 

357 >>> default_color = G.graph["edge_default"]["color"] # doctest: +SKIP 

358 >>> for u, v, data in G.edges(data=True): # doctest: +SKIP 

359 ... if "color" not in data: 

360 ... data["color"] = default_color 

361 

362 This implementation does not support mixed graphs (directed and unidirected 

363 edges together), hypergraphs, nested graphs, or ports. 

364 

365 For multigraphs the GraphML edge "id" will be used as the edge 

366 key. If not specified then they "key" attribute will be used. If 

367 there is no "key" attribute a default NetworkX multigraph edge key 

368 will be provided. 

369 

370 """ 

371 reader = GraphMLReader(node_type, edge_key_type, force_multigraph) 

372 # need to check for multiple graphs 

373 glist = list(reader(string=graphml_string)) 

374 if len(glist) == 0: 

375 # If no graph comes back, try looking for an incomplete header 

376 header = '<graphml xmlns="http://graphml.graphdrawing.org/xmlns">' 

377 new_string = graphml_string.replace("<graphml>", header) 

378 glist = list(reader(string=new_string)) 

379 if len(glist) == 0: 

380 raise nx.NetworkXError("file not successfully read as graphml") 

381 return glist[0] 

382 

383 

384class GraphML: 

385 NS_GRAPHML = "http://graphml.graphdrawing.org/xmlns" 

386 NS_XSI = "http://www.w3.org/2001/XMLSchema-instance" 

387 # xmlns:y="http://www.yworks.com/xml/graphml" 

388 NS_Y = "http://www.yworks.com/xml/graphml" 

389 SCHEMALOCATION = " ".join( 

390 [ 

391 "http://graphml.graphdrawing.org/xmlns", 

392 "http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd", 

393 ] 

394 ) 

395 

396 def construct_types(self): 

397 types = [ 

398 (int, "integer"), # for Gephi GraphML bug 

399 (str, "yfiles"), 

400 (str, "string"), 

401 (int, "int"), 

402 (int, "long"), 

403 (float, "float"), 

404 (float, "double"), 

405 (bool, "boolean"), 

406 ] 

407 

408 # These additions to types allow writing numpy types 

409 try: 

410 import numpy as np 

411 except: 

412 pass 

413 else: 

414 # prepend so that python types are created upon read (last entry wins) 

415 types = [ 

416 (np.float64, "float"), 

417 (np.float32, "float"), 

418 (np.float16, "float"), 

419 (np.int_, "int"), 

420 (np.int8, "int"), 

421 (np.int16, "int"), 

422 (np.int32, "int"), 

423 (np.int64, "int"), 

424 (np.uint8, "int"), 

425 (np.uint16, "int"), 

426 (np.uint32, "int"), 

427 (np.uint64, "int"), 

428 (np.int_, "int"), 

429 (np.intc, "int"), 

430 (np.intp, "int"), 

431 ] + types 

432 

433 self.xml_type = dict(types) 

434 self.python_type = dict(reversed(a) for a in types) 

435 

436 # This page says that data types in GraphML follow Java(TM). 

437 # http://graphml.graphdrawing.org/primer/graphml-primer.html#AttributesDefinition 

438 # true and false are the only boolean literals: 

439 # http://en.wikibooks.org/wiki/Java_Programming/Literals#Boolean_Literals 

440 convert_bool = { 

441 # We use data.lower() in actual use. 

442 "true": True, 

443 "false": False, 

444 # Include integer strings for convenience. 

445 "0": False, 

446 0: False, 

447 "1": True, 

448 1: True, 

449 } 

450 

451 def get_xml_type(self, key): 

452 """Wrapper around the xml_type dict that raises a more informative 

453 exception message when a user attempts to use data of a type not 

454 supported by GraphML.""" 

455 try: 

456 return self.xml_type[key] 

457 except KeyError as err: 

458 raise TypeError( 

459 f"GraphML does not support type {key} as data values." 

460 ) from err 

461 

462 

463class GraphMLWriter(GraphML): 

464 def __init__( 

465 self, 

466 graph=None, 

467 encoding="utf-8", 

468 prettyprint=True, 

469 infer_numeric_types=False, 

470 named_key_ids=False, 

471 edge_id_from_attribute=None, 

472 ): 

473 self.construct_types() 

474 from xml.etree.ElementTree import Element 

475 

476 self.myElement = Element 

477 

478 self.infer_numeric_types = infer_numeric_types 

479 self.prettyprint = prettyprint 

480 self.named_key_ids = named_key_ids 

481 self.edge_id_from_attribute = edge_id_from_attribute 

482 self.encoding = encoding 

483 self.xml = self.myElement( 

484 "graphml", 

485 { 

486 "xmlns": self.NS_GRAPHML, 

487 "xmlns:xsi": self.NS_XSI, 

488 "xsi:schemaLocation": self.SCHEMALOCATION, 

489 }, 

490 ) 

491 self.keys = {} 

492 self.attributes = defaultdict(list) 

493 self.attribute_types = defaultdict(set) 

494 

495 if graph is not None: 

496 self.add_graph_element(graph) 

497 

498 def __str__(self): 

499 from xml.etree.ElementTree import tostring 

500 

501 if self.prettyprint: 

502 self.indent(self.xml) 

503 s = tostring(self.xml).decode(self.encoding) 

504 return s 

505 

506 def attr_type(self, name, scope, value): 

507 """Infer the attribute type of data named name. Currently this only 

508 supports inference of numeric types. 

509 

510 If self.infer_numeric_types is false, type is used. Otherwise, pick the 

511 most general of types found across all values with name and scope. This 

512 means edges with data named 'weight' are treated separately from nodes 

513 with data named 'weight'. 

514 """ 

515 if self.infer_numeric_types: 

516 types = self.attribute_types[(name, scope)] 

517 

518 if len(types) > 1: 

519 types = {self.get_xml_type(t) for t in types} 

520 if "string" in types: 

521 return str 

522 elif "float" in types or "double" in types: 

523 return float 

524 else: 

525 return int 

526 else: 

527 return list(types)[0] 

528 else: 

529 return type(value) 

530 

531 def get_key(self, name, attr_type, scope, default): 

532 keys_key = (name, attr_type, scope) 

533 try: 

534 return self.keys[keys_key] 

535 except KeyError: 

536 if self.named_key_ids: 

537 new_id = name 

538 else: 

539 new_id = f"d{len(list(self.keys))}" 

540 

541 self.keys[keys_key] = new_id 

542 key_kwargs = { 

543 "id": new_id, 

544 "for": scope, 

545 "attr.name": name, 

546 "attr.type": attr_type, 

547 } 

548 key_element = self.myElement("key", **key_kwargs) 

549 # add subelement for data default value if present 

550 if default is not None: 

551 default_element = self.myElement("default") 

552 default_element.text = str(default) 

553 key_element.append(default_element) 

554 self.xml.insert(0, key_element) 

555 return new_id 

556 

557 def add_data(self, name, element_type, value, scope="all", default=None): 

558 """ 

559 Make a data element for an edge or a node. Keep a log of the 

560 type in the keys table. 

561 """ 

562 if element_type not in self.xml_type: 

563 raise nx.NetworkXError( 

564 f"GraphML writer does not support {element_type} as data values." 

565 ) 

566 keyid = self.get_key(name, self.get_xml_type(element_type), scope, default) 

567 data_element = self.myElement("data", key=keyid) 

568 data_element.text = str(value) 

569 return data_element 

570 

571 def add_attributes(self, scope, xml_obj, data, default): 

572 """Appends attribute data to edges or nodes, and stores type information 

573 to be added later. See add_graph_element. 

574 """ 

575 for k, v in data.items(): 

576 self.attribute_types[(str(k), scope)].add(type(v)) 

577 self.attributes[xml_obj].append([k, v, scope, default.get(k)]) 

578 

579 def add_nodes(self, G, graph_element): 

580 default = G.graph.get("node_default", {}) 

581 for node, data in G.nodes(data=True): 

582 node_element = self.myElement("node", id=str(node)) 

583 self.add_attributes("node", node_element, data, default) 

584 graph_element.append(node_element) 

585 

586 def add_edges(self, G, graph_element): 

587 if G.is_multigraph(): 

588 for u, v, key, data in G.edges(data=True, keys=True): 

589 edge_element = self.myElement( 

590 "edge", 

591 source=str(u), 

592 target=str(v), 

593 id=str(data.get(self.edge_id_from_attribute)) 

594 if self.edge_id_from_attribute 

595 and self.edge_id_from_attribute in data 

596 else str(key), 

597 ) 

598 default = G.graph.get("edge_default", {}) 

599 self.add_attributes("edge", edge_element, data, default) 

600 graph_element.append(edge_element) 

601 else: 

602 for u, v, data in G.edges(data=True): 

603 if self.edge_id_from_attribute and self.edge_id_from_attribute in data: 

604 # select attribute to be edge id 

605 edge_element = self.myElement( 

606 "edge", 

607 source=str(u), 

608 target=str(v), 

609 id=str(data.get(self.edge_id_from_attribute)), 

610 ) 

611 else: 

612 # default: no edge id 

613 edge_element = self.myElement("edge", source=str(u), target=str(v)) 

614 default = G.graph.get("edge_default", {}) 

615 self.add_attributes("edge", edge_element, data, default) 

616 graph_element.append(edge_element) 

617 

618 def add_graph_element(self, G): 

619 """ 

620 Serialize graph G in GraphML to the stream. 

621 """ 

622 if G.is_directed(): 

623 default_edge_type = "directed" 

624 else: 

625 default_edge_type = "undirected" 

626 

627 graphid = G.graph.pop("id", None) 

628 if graphid is None: 

629 graph_element = self.myElement("graph", edgedefault=default_edge_type) 

630 else: 

631 graph_element = self.myElement( 

632 "graph", edgedefault=default_edge_type, id=graphid 

633 ) 

634 default = {} 

635 data = { 

636 k: v 

637 for (k, v) in G.graph.items() 

638 if k not in ["node_default", "edge_default"] 

639 } 

640 self.add_attributes("graph", graph_element, data, default) 

641 self.add_nodes(G, graph_element) 

642 self.add_edges(G, graph_element) 

643 

644 # self.attributes contains a mapping from XML Objects to a list of 

645 # data that needs to be added to them. 

646 # We postpone processing in order to do type inference/generalization. 

647 # See self.attr_type 

648 for xml_obj, data in self.attributes.items(): 

649 for k, v, scope, default in data: 

650 xml_obj.append( 

651 self.add_data( 

652 str(k), self.attr_type(k, scope, v), str(v), scope, default 

653 ) 

654 ) 

655 self.xml.append(graph_element) 

656 

657 def add_graphs(self, graph_list): 

658 """Add many graphs to this GraphML document.""" 

659 for G in graph_list: 

660 self.add_graph_element(G) 

661 

662 def dump(self, stream): 

663 from xml.etree.ElementTree import ElementTree 

664 

665 if self.prettyprint: 

666 self.indent(self.xml) 

667 document = ElementTree(self.xml) 

668 document.write(stream, encoding=self.encoding, xml_declaration=True) 

669 

670 def indent(self, elem, level=0): 

671 # in-place prettyprint formatter 

672 i = "\n" + level * " " 

673 if len(elem): 

674 if not elem.text or not elem.text.strip(): 

675 elem.text = i + " " 

676 if not elem.tail or not elem.tail.strip(): 

677 elem.tail = i 

678 for elem in elem: 

679 self.indent(elem, level + 1) 

680 if not elem.tail or not elem.tail.strip(): 

681 elem.tail = i 

682 else: 

683 if level and (not elem.tail or not elem.tail.strip()): 

684 elem.tail = i 

685 

686 

687class IncrementalElement: 

688 """Wrapper for _IncrementalWriter providing an Element like interface. 

689 

690 This wrapper does not intend to be a complete implementation but rather to 

691 deal with those calls used in GraphMLWriter. 

692 """ 

693 

694 def __init__(self, xml, prettyprint): 

695 self.xml = xml 

696 self.prettyprint = prettyprint 

697 

698 def append(self, element): 

699 self.xml.write(element, pretty_print=self.prettyprint) 

700 

701 

702class GraphMLWriterLxml(GraphMLWriter): 

703 def __init__( 

704 self, 

705 path, 

706 graph=None, 

707 encoding="utf-8", 

708 prettyprint=True, 

709 infer_numeric_types=False, 

710 named_key_ids=False, 

711 edge_id_from_attribute=None, 

712 ): 

713 self.construct_types() 

714 import lxml.etree as lxmletree 

715 

716 self.myElement = lxmletree.Element 

717 

718 self._encoding = encoding 

719 self._prettyprint = prettyprint 

720 self.named_key_ids = named_key_ids 

721 self.edge_id_from_attribute = edge_id_from_attribute 

722 self.infer_numeric_types = infer_numeric_types 

723 

724 self._xml_base = lxmletree.xmlfile(path, encoding=encoding) 

725 self._xml = self._xml_base.__enter__() 

726 self._xml.write_declaration() 

727 

728 # We need to have a xml variable that support insertion. This call is 

729 # used for adding the keys to the document. 

730 # We will store those keys in a plain list, and then after the graph 

731 # element is closed we will add them to the main graphml element. 

732 self.xml = [] 

733 self._keys = self.xml 

734 self._graphml = self._xml.element( 

735 "graphml", 

736 { 

737 "xmlns": self.NS_GRAPHML, 

738 "xmlns:xsi": self.NS_XSI, 

739 "xsi:schemaLocation": self.SCHEMALOCATION, 

740 }, 

741 ) 

742 self._graphml.__enter__() 

743 self.keys = {} 

744 self.attribute_types = defaultdict(set) 

745 

746 if graph is not None: 

747 self.add_graph_element(graph) 

748 

749 def add_graph_element(self, G): 

750 """ 

751 Serialize graph G in GraphML to the stream. 

752 """ 

753 if G.is_directed(): 

754 default_edge_type = "directed" 

755 else: 

756 default_edge_type = "undirected" 

757 

758 graphid = G.graph.pop("id", None) 

759 if graphid is None: 

760 graph_element = self._xml.element("graph", edgedefault=default_edge_type) 

761 else: 

762 graph_element = self._xml.element( 

763 "graph", edgedefault=default_edge_type, id=graphid 

764 ) 

765 

766 # gather attributes types for the whole graph 

767 # to find the most general numeric format needed. 

768 # Then pass through attributes to create key_id for each. 

769 graphdata = { 

770 k: v 

771 for k, v in G.graph.items() 

772 if k not in ("node_default", "edge_default") 

773 } 

774 node_default = G.graph.get("node_default", {}) 

775 edge_default = G.graph.get("edge_default", {}) 

776 # Graph attributes 

777 for k, v in graphdata.items(): 

778 self.attribute_types[(str(k), "graph")].add(type(v)) 

779 for k, v in graphdata.items(): 

780 element_type = self.get_xml_type(self.attr_type(k, "graph", v)) 

781 self.get_key(str(k), element_type, "graph", None) 

782 # Nodes and data 

783 for node, d in G.nodes(data=True): 

784 for k, v in d.items(): 

785 self.attribute_types[(str(k), "node")].add(type(v)) 

786 for node, d in G.nodes(data=True): 

787 for k, v in d.items(): 

788 T = self.get_xml_type(self.attr_type(k, "node", v)) 

789 self.get_key(str(k), T, "node", node_default.get(k)) 

790 # Edges and data 

791 if G.is_multigraph(): 

792 for u, v, ekey, d in G.edges(keys=True, data=True): 

793 for k, v in d.items(): 

794 self.attribute_types[(str(k), "edge")].add(type(v)) 

795 for u, v, ekey, d in G.edges(keys=True, data=True): 

796 for k, v in d.items(): 

797 T = self.get_xml_type(self.attr_type(k, "edge", v)) 

798 self.get_key(str(k), T, "edge", edge_default.get(k)) 

799 else: 

800 for u, v, d in G.edges(data=True): 

801 for k, v in d.items(): 

802 self.attribute_types[(str(k), "edge")].add(type(v)) 

803 for u, v, d in G.edges(data=True): 

804 for k, v in d.items(): 

805 T = self.get_xml_type(self.attr_type(k, "edge", v)) 

806 self.get_key(str(k), T, "edge", edge_default.get(k)) 

807 

808 # Now add attribute keys to the xml file 

809 for key in self.xml: 

810 self._xml.write(key, pretty_print=self._prettyprint) 

811 

812 # The incremental_writer writes each node/edge as it is created 

813 incremental_writer = IncrementalElement(self._xml, self._prettyprint) 

814 with graph_element: 

815 self.add_attributes("graph", incremental_writer, graphdata, {}) 

816 self.add_nodes(G, incremental_writer) # adds attributes too 

817 self.add_edges(G, incremental_writer) # adds attributes too 

818 

819 def add_attributes(self, scope, xml_obj, data, default): 

820 """Appends attribute data.""" 

821 for k, v in data.items(): 

822 data_element = self.add_data( 

823 str(k), self.attr_type(str(k), scope, v), str(v), scope, default.get(k) 

824 ) 

825 xml_obj.append(data_element) 

826 

827 def __str__(self): 

828 return object.__str__(self) 

829 

830 def dump(self, stream=None): 

831 self._graphml.__exit__(None, None, None) 

832 self._xml_base.__exit__(None, None, None) 

833 

834 

835# default is lxml is present. 

836write_graphml = write_graphml_lxml 

837 

838 

839class GraphMLReader(GraphML): 

840 """Read a GraphML document. Produces NetworkX graph objects.""" 

841 

842 def __init__(self, node_type=str, edge_key_type=int, force_multigraph=False): 

843 self.construct_types() 

844 self.node_type = node_type 

845 self.edge_key_type = edge_key_type 

846 self.multigraph = force_multigraph # If False, test for multiedges 

847 self.edge_ids = {} # dict mapping (u,v) tuples to edge id attributes 

848 

849 def __call__(self, path=None, string=None): 

850 from xml.etree.ElementTree import ElementTree, fromstring 

851 

852 if path is not None: 

853 self.xml = ElementTree(file=path) 

854 elif string is not None: 

855 self.xml = fromstring(string) 

856 else: 

857 raise ValueError("Must specify either 'path' or 'string' as kwarg") 

858 (keys, defaults) = self.find_graphml_keys(self.xml) 

859 for g in self.xml.findall(f"{{{self.NS_GRAPHML}}}graph"): 

860 yield self.make_graph(g, keys, defaults) 

861 

862 def make_graph(self, graph_xml, graphml_keys, defaults, G=None): 

863 # set default graph type 

864 edgedefault = graph_xml.get("edgedefault", None) 

865 if G is None: 

866 if edgedefault == "directed": 

867 G = nx.MultiDiGraph() 

868 else: 

869 G = nx.MultiGraph() 

870 # set defaults for graph attributes 

871 G.graph["node_default"] = {} 

872 G.graph["edge_default"] = {} 

873 for key_id, value in defaults.items(): 

874 key_for = graphml_keys[key_id]["for"] 

875 name = graphml_keys[key_id]["name"] 

876 python_type = graphml_keys[key_id]["type"] 

877 if key_for == "node": 

878 G.graph["node_default"].update({name: python_type(value)}) 

879 if key_for == "edge": 

880 G.graph["edge_default"].update({name: python_type(value)}) 

881 # hyperedges are not supported 

882 hyperedge = graph_xml.find(f"{{{self.NS_GRAPHML}}}hyperedge") 

883 if hyperedge is not None: 

884 raise nx.NetworkXError("GraphML reader doesn't support hyperedges") 

885 # add nodes 

886 for node_xml in graph_xml.findall(f"{{{self.NS_GRAPHML}}}node"): 

887 self.add_node(G, node_xml, graphml_keys, defaults) 

888 # add edges 

889 for edge_xml in graph_xml.findall(f"{{{self.NS_GRAPHML}}}edge"): 

890 self.add_edge(G, edge_xml, graphml_keys) 

891 # add graph data 

892 data = self.decode_data_elements(graphml_keys, graph_xml) 

893 G.graph.update(data) 

894 

895 # switch to Graph or DiGraph if no parallel edges were found 

896 if self.multigraph: 

897 return G 

898 

899 G = nx.DiGraph(G) if G.is_directed() else nx.Graph(G) 

900 # add explicit edge "id" from file as attribute in NX graph. 

901 nx.set_edge_attributes(G, values=self.edge_ids, name="id") 

902 return G 

903 

904 def add_node(self, G, node_xml, graphml_keys, defaults): 

905 """Add a node to the graph.""" 

906 # warn on finding unsupported ports tag 

907 ports = node_xml.find(f"{{{self.NS_GRAPHML}}}port") 

908 if ports is not None: 

909 warnings.warn("GraphML port tag not supported.") 

910 # find the node by id and cast it to the appropriate type 

911 node_id = self.node_type(node_xml.get("id")) 

912 # get data/attributes for node 

913 data = self.decode_data_elements(graphml_keys, node_xml) 

914 G.add_node(node_id, **data) 

915 # get child nodes 

916 if node_xml.attrib.get("yfiles.foldertype") == "group": 

917 graph_xml = node_xml.find(f"{{{self.NS_GRAPHML}}}graph") 

918 self.make_graph(graph_xml, graphml_keys, defaults, G) 

919 

920 def add_edge(self, G, edge_element, graphml_keys): 

921 """Add an edge to the graph.""" 

922 # warn on finding unsupported ports tag 

923 ports = edge_element.find(f"{{{self.NS_GRAPHML}}}port") 

924 if ports is not None: 

925 warnings.warn("GraphML port tag not supported.") 

926 

927 # raise error if we find mixed directed and undirected edges 

928 directed = edge_element.get("directed") 

929 if G.is_directed() and directed == "false": 

930 msg = "directed=false edge found in directed graph." 

931 raise nx.NetworkXError(msg) 

932 if (not G.is_directed()) and directed == "true": 

933 msg = "directed=true edge found in undirected graph." 

934 raise nx.NetworkXError(msg) 

935 

936 source = self.node_type(edge_element.get("source")) 

937 target = self.node_type(edge_element.get("target")) 

938 data = self.decode_data_elements(graphml_keys, edge_element) 

939 # GraphML stores edge ids as an attribute 

940 # NetworkX uses them as keys in multigraphs too if no key 

941 # attribute is specified 

942 edge_id = edge_element.get("id") 

943 if edge_id: 

944 # self.edge_ids is used by `make_graph` method for non-multigraphs 

945 self.edge_ids[source, target] = edge_id 

946 try: 

947 edge_id = self.edge_key_type(edge_id) 

948 except ValueError: # Could not convert. 

949 pass 

950 else: 

951 edge_id = data.get("key") 

952 

953 if G.has_edge(source, target): 

954 # mark this as a multigraph 

955 self.multigraph = True 

956 

957 # Use add_edges_from to avoid error with add_edge when `'key' in data` 

958 # Note there is only one edge here... 

959 G.add_edges_from([(source, target, edge_id, data)]) 

960 

961 def decode_data_elements(self, graphml_keys, obj_xml): 

962 """Use the key information to decode the data XML if present.""" 

963 data = {} 

964 for data_element in obj_xml.findall(f"{{{self.NS_GRAPHML}}}data"): 

965 key = data_element.get("key") 

966 try: 

967 data_name = graphml_keys[key]["name"] 

968 data_type = graphml_keys[key]["type"] 

969 except KeyError as err: 

970 raise nx.NetworkXError(f"Bad GraphML data: no key {key}") from err 

971 text = data_element.text 

972 # assume anything with subelements is a yfiles extension 

973 if text is not None and len(list(data_element)) == 0: 

974 if data_type is bool: 

975 # Ignore cases. 

976 # http://docs.oracle.com/javase/6/docs/api/java/lang/ 

977 # Boolean.html#parseBoolean%28java.lang.String%29 

978 data[data_name] = self.convert_bool[text.lower()] 

979 else: 

980 data[data_name] = data_type(text) 

981 elif len(list(data_element)) > 0: 

982 # Assume yfiles as subelements, try to extract node_label 

983 node_label = None 

984 # set GenericNode's configuration as shape type 

985 gn = data_element.find(f"{{{self.NS_Y}}}GenericNode") 

986 if gn is not None: 

987 data["shape_type"] = gn.get("configuration") 

988 for node_type in ["GenericNode", "ShapeNode", "SVGNode", "ImageNode"]: 

989 pref = f"{{{self.NS_Y}}}{node_type}/{{{self.NS_Y}}}" 

990 geometry = data_element.find(f"{pref}Geometry") 

991 if geometry is not None: 

992 data["x"] = geometry.get("x") 

993 data["y"] = geometry.get("y") 

994 if node_label is None: 

995 node_label = data_element.find(f"{pref}NodeLabel") 

996 shape = data_element.find(f"{pref}Shape") 

997 if shape is not None: 

998 data["shape_type"] = shape.get("type") 

999 if node_label is not None: 

1000 data["label"] = node_label.text 

1001 

1002 # check all the different types of edges available in yEd. 

1003 for edge_type in [ 

1004 "PolyLineEdge", 

1005 "SplineEdge", 

1006 "QuadCurveEdge", 

1007 "BezierEdge", 

1008 "ArcEdge", 

1009 ]: 

1010 pref = f"{{{self.NS_Y}}}{edge_type}/{{{self.NS_Y}}}" 

1011 edge_label = data_element.find(f"{pref}EdgeLabel") 

1012 if edge_label is not None: 

1013 break 

1014 if edge_label is not None: 

1015 data["label"] = edge_label.text 

1016 elif text is None: 

1017 data[data_name] = "" 

1018 return data 

1019 

1020 def find_graphml_keys(self, graph_element): 

1021 """Extracts all the keys and key defaults from the xml.""" 

1022 graphml_keys = {} 

1023 graphml_key_defaults = {} 

1024 for k in graph_element.findall(f"{{{self.NS_GRAPHML}}}key"): 

1025 attr_id = k.get("id") 

1026 attr_type = k.get("attr.type") 

1027 attr_name = k.get("attr.name") 

1028 yfiles_type = k.get("yfiles.type") 

1029 if yfiles_type is not None: 

1030 attr_name = yfiles_type 

1031 attr_type = "yfiles" 

1032 if attr_type is None: 

1033 attr_type = "string" 

1034 warnings.warn(f"No key type for id {attr_id}. Using string") 

1035 if attr_name is None: 

1036 raise nx.NetworkXError(f"Unknown key for id {attr_id}.") 

1037 graphml_keys[attr_id] = { 

1038 "name": attr_name, 

1039 "type": self.python_type[attr_type], 

1040 "for": k.get("for"), 

1041 } 

1042 # check for "default" sub-element of key element 

1043 default = k.find(f"{{{self.NS_GRAPHML}}}default") 

1044 if default is not None: 

1045 # Handle default values identically to data element values 

1046 python_type = graphml_keys[attr_id]["type"] 

1047 if python_type is bool: 

1048 graphml_key_defaults[attr_id] = self.convert_bool[ 

1049 default.text.lower() 

1050 ] 

1051 else: 

1052 graphml_key_defaults[attr_id] = python_type(default.text) 

1053 return graphml_keys, graphml_key_defaults