1"""
2*******
3GraphML
4*******
5Read and write graphs in GraphML format.
6
7.. warning::
8
9 This parser uses the standard xml library present in Python, which is
10 insecure - see :external+python:mod:`xml` for additional information.
11 Only parse GraphML files you trust.
12
13This implementation does not support mixed graphs (directed and unidirected
14edges together), hyperedges, nested graphs, or ports.
15
16"GraphML is a comprehensive and easy-to-use file format for graphs. It
17consists of a language core to describe the structural properties of a
18graph and a flexible extension mechanism to add application-specific
19data. Its main features include support of
20
21 * directed, undirected, and mixed graphs,
22 * hypergraphs,
23 * hierarchical graphs,
24 * graphical representations,
25 * references to external data,
26 * application-specific attribute data, and
27 * light-weight parsers.
28
29Unlike many other file formats for graphs, GraphML does not use a
30custom syntax. Instead, it is based on XML and hence ideally suited as
31a common denominator for all kinds of services generating, archiving,
32or processing graphs."
33
34http://graphml.graphdrawing.org/
35
36Format
37------
38GraphML is an XML format. See
39http://graphml.graphdrawing.org/specification.html for the specification and
40http://graphml.graphdrawing.org/primer/graphml-primer.html
41for examples.
42"""
43
44import warnings
45from collections import defaultdict
46
47import networkx as nx
48from networkx.utils import open_file
49
50__all__ = [
51 "write_graphml",
52 "read_graphml",
53 "generate_graphml",
54 "write_graphml_xml",
55 "write_graphml_lxml",
56 "parse_graphml",
57 "GraphMLWriter",
58 "GraphMLReader",
59]
60
61
62@open_file(1, mode="wb")
63def write_graphml_xml(
64 G,
65 path,
66 encoding="utf-8",
67 prettyprint=True,
68 infer_numeric_types=False,
69 named_key_ids=False,
70 edge_id_from_attribute=None,
71):
72 """Write G in GraphML XML format to path
73
74 Parameters
75 ----------
76 G : graph
77 A networkx graph
78 path : file or string
79 File or filename to write.
80 Filenames ending in .gz or .bz2 will be compressed.
81 encoding : string (optional)
82 Encoding for text data.
83 prettyprint : bool (optional)
84 If True use line breaks and indenting in output XML.
85 infer_numeric_types : boolean
86 Determine if numeric types should be generalized.
87 For example, if edges have both int and float 'weight' attributes,
88 we infer in GraphML that both are floats.
89 named_key_ids : bool (optional)
90 If True use attr.name as value for key elements' id attribute.
91 edge_id_from_attribute : dict key (optional)
92 If provided, the graphml edge id is set by looking up the corresponding
93 edge data attribute keyed by this parameter. If `None` or the key does not exist in edge data,
94 the edge id is set by the edge key if `G` is a MultiGraph, else the edge id is left unset.
95
96 Examples
97 --------
98 >>> G = nx.path_graph(4)
99 >>> nx.write_graphml(G, "test.graphml")
100
101 Notes
102 -----
103 This implementation does not support mixed graphs (directed
104 and unidirected edges together) hyperedges, nested graphs, or ports.
105 """
106 writer = GraphMLWriter(
107 encoding=encoding,
108 prettyprint=prettyprint,
109 infer_numeric_types=infer_numeric_types,
110 named_key_ids=named_key_ids,
111 edge_id_from_attribute=edge_id_from_attribute,
112 )
113 writer.add_graph_element(G)
114 writer.dump(path)
115
116
117@open_file(1, mode="wb")
118def write_graphml_lxml(
119 G,
120 path,
121 encoding="utf-8",
122 prettyprint=True,
123 infer_numeric_types=False,
124 named_key_ids=False,
125 edge_id_from_attribute=None,
126):
127 """Write G in GraphML XML format to path
128
129 This function uses the LXML framework and should be faster than
130 the version using the xml library.
131
132 Parameters
133 ----------
134 G : graph
135 A networkx graph
136 path : file or string
137 File or filename to write.
138 Filenames ending in .gz or .bz2 will be compressed.
139 encoding : string (optional)
140 Encoding for text data.
141 prettyprint : bool (optional)
142 If True use line breaks and indenting in output XML.
143 infer_numeric_types : boolean
144 Determine if numeric types should be generalized.
145 For example, if edges have both int and float 'weight' attributes,
146 we infer in GraphML that both are floats.
147 named_key_ids : bool (optional)
148 If True use attr.name as value for key elements' id attribute.
149 edge_id_from_attribute : dict key (optional)
150 If provided, the graphml edge id is set by looking up the corresponding
151 edge data attribute keyed by this parameter. If `None` or the key does not exist in edge data,
152 the edge id is set by the edge key if `G` is a MultiGraph, else the edge id is left unset.
153
154 Examples
155 --------
156 >>> G = nx.path_graph(4)
157 >>> nx.write_graphml_lxml(G, "fourpath.graphml")
158
159 Notes
160 -----
161 This implementation does not support mixed graphs (directed
162 and unidirected edges together) hyperedges, nested graphs, or ports.
163 """
164 try:
165 import lxml.etree as lxmletree
166 except ImportError:
167 return write_graphml_xml(
168 G,
169 path,
170 encoding,
171 prettyprint,
172 infer_numeric_types,
173 named_key_ids,
174 edge_id_from_attribute,
175 )
176
177 writer = GraphMLWriterLxml(
178 path,
179 graph=G,
180 encoding=encoding,
181 prettyprint=prettyprint,
182 infer_numeric_types=infer_numeric_types,
183 named_key_ids=named_key_ids,
184 edge_id_from_attribute=edge_id_from_attribute,
185 )
186 writer.dump()
187
188
189def generate_graphml(
190 G,
191 encoding="utf-8",
192 prettyprint=True,
193 named_key_ids=False,
194 edge_id_from_attribute=None,
195):
196 """Generate GraphML lines for G
197
198 Parameters
199 ----------
200 G : graph
201 A networkx graph
202 encoding : string (optional)
203 Encoding for text data.
204 prettyprint : bool (optional)
205 If True use line breaks and indenting in output XML.
206 named_key_ids : bool (optional)
207 If True use attr.name as value for key elements' id attribute.
208 edge_id_from_attribute : dict key (optional)
209 If provided, the graphml edge id is set by looking up the corresponding
210 edge data attribute keyed by this parameter. If `None` or the key does not exist in edge data,
211 the edge id is set by the edge key if `G` is a MultiGraph, else the edge id is left unset.
212
213 Examples
214 --------
215 >>> G = nx.path_graph(4)
216 >>> linefeed = chr(10) # linefeed = \n
217 >>> s = linefeed.join(nx.generate_graphml(G))
218 >>> for line in nx.generate_graphml(G): # doctest: +SKIP
219 ... print(line)
220
221 Notes
222 -----
223 This implementation does not support mixed graphs (directed and unidirected
224 edges together) hyperedges, nested graphs, or ports.
225 """
226 writer = GraphMLWriter(
227 encoding=encoding,
228 prettyprint=prettyprint,
229 named_key_ids=named_key_ids,
230 edge_id_from_attribute=edge_id_from_attribute,
231 )
232 writer.add_graph_element(G)
233 yield from str(writer).splitlines()
234
235
236@open_file(0, mode="rb")
237@nx._dispatchable(graphs=None, returns_graph=True)
238def read_graphml(path, node_type=str, edge_key_type=int, force_multigraph=False):
239 """Read graph in GraphML format from path.
240
241 Parameters
242 ----------
243 path : file or string
244 Filename or file handle to read.
245 Filenames ending in .gz or .bz2 will be decompressed.
246
247 node_type: Python type (default: str)
248 Convert node ids to this type
249
250 edge_key_type: Python type (default: int)
251 Convert graphml edge ids to this type. Multigraphs use id as edge key.
252 Non-multigraphs add to edge attribute dict with name "id".
253
254 force_multigraph : bool (default: False)
255 If True, return a multigraph with edge keys. If False (the default)
256 return a multigraph when multiedges are in the graph.
257
258 Returns
259 -------
260 graph: NetworkX graph
261 If parallel edges are present or `force_multigraph=True` then
262 a MultiGraph or MultiDiGraph is returned. Otherwise a Graph/DiGraph.
263 The returned graph is directed if the file indicates it should be.
264
265 Notes
266 -----
267 Default node and edge attributes are not propagated to each node and edge.
268 They can be obtained from `G.graph` and applied to node and edge attributes
269 if desired using something like this:
270
271 >>> default_color = G.graph["node_default"]["color"] # doctest: +SKIP
272 >>> for node, data in G.nodes(data=True): # doctest: +SKIP
273 ... if "color" not in data:
274 ... data["color"] = default_color
275 >>> default_color = G.graph["edge_default"]["color"] # doctest: +SKIP
276 >>> for u, v, data in G.edges(data=True): # doctest: +SKIP
277 ... if "color" not in data:
278 ... data["color"] = default_color
279
280 This implementation does not support mixed graphs (directed and unidirected
281 edges together), hypergraphs, nested graphs, or ports.
282
283 For multigraphs the GraphML edge "id" will be used as the edge
284 key. If not specified then they "key" attribute will be used. If
285 there is no "key" attribute a default NetworkX multigraph edge key
286 will be provided.
287
288 Files with the yEd "yfiles" extension can be read. The type of the node's
289 shape is preserved in the `shape_type` node attribute.
290
291 yEd compressed files ("file.graphmlz" extension) can be read by renaming
292 the file to "file.graphml.gz".
293
294 """
295 reader = GraphMLReader(node_type, edge_key_type, force_multigraph)
296 # need to check for multiple graphs
297 glist = list(reader(path=path))
298 if len(glist) == 0:
299 # If no graph comes back, try looking for an incomplete header
300 header = b'<graphml xmlns="http://graphml.graphdrawing.org/xmlns">'
301 path.seek(0)
302 old_bytes = path.read()
303 new_bytes = old_bytes.replace(b"<graphml>", header)
304 glist = list(reader(string=new_bytes))
305 if len(glist) == 0:
306 raise nx.NetworkXError("file not successfully read as graphml")
307 return glist[0]
308
309
310@nx._dispatchable(graphs=None, returns_graph=True)
311def parse_graphml(
312 graphml_string, node_type=str, edge_key_type=int, force_multigraph=False
313):
314 """Read graph in GraphML format from string.
315
316 Parameters
317 ----------
318 graphml_string : string
319 String containing graphml information
320 (e.g., contents of a graphml file).
321
322 node_type: Python type (default: str)
323 Convert node ids to this type
324
325 edge_key_type: Python type (default: int)
326 Convert graphml edge ids to this type. Multigraphs use id as edge key.
327 Non-multigraphs add to edge attribute dict with name "id".
328
329 force_multigraph : bool (default: False)
330 If True, return a multigraph with edge keys. If False (the default)
331 return a multigraph when multiedges are in the graph.
332
333
334 Returns
335 -------
336 graph: NetworkX graph
337 If no parallel edges are found a Graph or DiGraph is returned.
338 Otherwise a MultiGraph or MultiDiGraph is returned.
339
340 Examples
341 --------
342 >>> G = nx.path_graph(4)
343 >>> linefeed = chr(10) # linefeed = \n
344 >>> s = linefeed.join(nx.generate_graphml(G))
345 >>> H = nx.parse_graphml(s)
346
347 Notes
348 -----
349 Default node and edge attributes are not propagated to each node and edge.
350 They can be obtained from `G.graph` and applied to node and edge attributes
351 if desired using something like this:
352
353 >>> default_color = G.graph["node_default"]["color"] # doctest: +SKIP
354 >>> for node, data in G.nodes(data=True): # doctest: +SKIP
355 ... if "color" not in data:
356 ... data["color"] = default_color
357 >>> default_color = G.graph["edge_default"]["color"] # doctest: +SKIP
358 >>> for u, v, data in G.edges(data=True): # doctest: +SKIP
359 ... if "color" not in data:
360 ... data["color"] = default_color
361
362 This implementation does not support mixed graphs (directed and unidirected
363 edges together), hypergraphs, nested graphs, or ports.
364
365 For multigraphs the GraphML edge "id" will be used as the edge
366 key. If not specified then they "key" attribute will be used. If
367 there is no "key" attribute a default NetworkX multigraph edge key
368 will be provided.
369
370 """
371 reader = GraphMLReader(node_type, edge_key_type, force_multigraph)
372 # need to check for multiple graphs
373 glist = list(reader(string=graphml_string))
374 if len(glist) == 0:
375 # If no graph comes back, try looking for an incomplete header
376 header = '<graphml xmlns="http://graphml.graphdrawing.org/xmlns">'
377 new_string = graphml_string.replace("<graphml>", header)
378 glist = list(reader(string=new_string))
379 if len(glist) == 0:
380 raise nx.NetworkXError("file not successfully read as graphml")
381 return glist[0]
382
383
384class GraphML:
385 NS_GRAPHML = "http://graphml.graphdrawing.org/xmlns"
386 NS_XSI = "http://www.w3.org/2001/XMLSchema-instance"
387 # xmlns:y="http://www.yworks.com/xml/graphml"
388 NS_Y = "http://www.yworks.com/xml/graphml"
389 SCHEMALOCATION = " ".join(
390 [
391 "http://graphml.graphdrawing.org/xmlns",
392 "http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd",
393 ]
394 )
395
396 def construct_types(self):
397 types = [
398 (int, "integer"), # for Gephi GraphML bug
399 (str, "yfiles"),
400 (str, "string"),
401 (int, "int"),
402 (int, "long"),
403 (float, "float"),
404 (float, "double"),
405 (bool, "boolean"),
406 ]
407
408 # These additions to types allow writing numpy types
409 try:
410 import numpy as np
411 except:
412 pass
413 else:
414 # prepend so that python types are created upon read (last entry wins)
415 types = [
416 (np.float64, "float"),
417 (np.float32, "float"),
418 (np.float16, "float"),
419 (np.int_, "int"),
420 (np.int8, "int"),
421 (np.int16, "int"),
422 (np.int32, "int"),
423 (np.int64, "int"),
424 (np.uint8, "int"),
425 (np.uint16, "int"),
426 (np.uint32, "int"),
427 (np.uint64, "int"),
428 (np.int_, "int"),
429 (np.intc, "int"),
430 (np.intp, "int"),
431 ] + types
432
433 self.xml_type = dict(types)
434 self.python_type = dict(reversed(a) for a in types)
435
436 # This page says that data types in GraphML follow Java(TM).
437 # http://graphml.graphdrawing.org/primer/graphml-primer.html#AttributesDefinition
438 # true and false are the only boolean literals:
439 # http://en.wikibooks.org/wiki/Java_Programming/Literals#Boolean_Literals
440 convert_bool = {
441 # We use data.lower() in actual use.
442 "true": True,
443 "false": False,
444 # Include integer strings for convenience.
445 "0": False,
446 0: False,
447 "1": True,
448 1: True,
449 }
450
451 def get_xml_type(self, key):
452 """Wrapper around the xml_type dict that raises a more informative
453 exception message when a user attempts to use data of a type not
454 supported by GraphML."""
455 try:
456 return self.xml_type[key]
457 except KeyError as err:
458 raise TypeError(
459 f"GraphML does not support type {key} as data values."
460 ) from err
461
462
463class GraphMLWriter(GraphML):
464 def __init__(
465 self,
466 graph=None,
467 encoding="utf-8",
468 prettyprint=True,
469 infer_numeric_types=False,
470 named_key_ids=False,
471 edge_id_from_attribute=None,
472 ):
473 self.construct_types()
474 from xml.etree.ElementTree import Element
475
476 self.myElement = Element
477
478 self.infer_numeric_types = infer_numeric_types
479 self.prettyprint = prettyprint
480 self.named_key_ids = named_key_ids
481 self.edge_id_from_attribute = edge_id_from_attribute
482 self.encoding = encoding
483 self.xml = self.myElement(
484 "graphml",
485 {
486 "xmlns": self.NS_GRAPHML,
487 "xmlns:xsi": self.NS_XSI,
488 "xsi:schemaLocation": self.SCHEMALOCATION,
489 },
490 )
491 self.keys = {}
492 self.attributes = defaultdict(list)
493 self.attribute_types = defaultdict(set)
494
495 if graph is not None:
496 self.add_graph_element(graph)
497
498 def __str__(self):
499 from xml.etree.ElementTree import tostring
500
501 if self.prettyprint:
502 self.indent(self.xml)
503 s = tostring(self.xml).decode(self.encoding)
504 return s
505
506 def attr_type(self, name, scope, value):
507 """Infer the attribute type of data named name. Currently this only
508 supports inference of numeric types.
509
510 If self.infer_numeric_types is false, type is used. Otherwise, pick the
511 most general of types found across all values with name and scope. This
512 means edges with data named 'weight' are treated separately from nodes
513 with data named 'weight'.
514 """
515 if self.infer_numeric_types:
516 types = self.attribute_types[(name, scope)]
517
518 if len(types) > 1:
519 types = {self.get_xml_type(t) for t in types}
520 if "string" in types:
521 return str
522 elif "float" in types or "double" in types:
523 return float
524 else:
525 return int
526 else:
527 return list(types)[0]
528 else:
529 return type(value)
530
531 def get_key(self, name, attr_type, scope, default):
532 keys_key = (name, attr_type, scope)
533 try:
534 return self.keys[keys_key]
535 except KeyError:
536 if self.named_key_ids:
537 new_id = name
538 else:
539 new_id = f"d{len(list(self.keys))}"
540
541 self.keys[keys_key] = new_id
542 key_kwargs = {
543 "id": new_id,
544 "for": scope,
545 "attr.name": name,
546 "attr.type": attr_type,
547 }
548 key_element = self.myElement("key", **key_kwargs)
549 # add subelement for data default value if present
550 if default is not None:
551 default_element = self.myElement("default")
552 default_element.text = str(default)
553 key_element.append(default_element)
554 self.xml.insert(0, key_element)
555 return new_id
556
557 def add_data(self, name, element_type, value, scope="all", default=None):
558 """
559 Make a data element for an edge or a node. Keep a log of the
560 type in the keys table.
561 """
562 if element_type not in self.xml_type:
563 raise nx.NetworkXError(
564 f"GraphML writer does not support {element_type} as data values."
565 )
566 keyid = self.get_key(name, self.get_xml_type(element_type), scope, default)
567 data_element = self.myElement("data", key=keyid)
568 data_element.text = str(value)
569 return data_element
570
571 def add_attributes(self, scope, xml_obj, data, default):
572 """Appends attribute data to edges or nodes, and stores type information
573 to be added later. See add_graph_element.
574 """
575 for k, v in data.items():
576 self.attribute_types[(str(k), scope)].add(type(v))
577 self.attributes[xml_obj].append([k, v, scope, default.get(k)])
578
579 def add_nodes(self, G, graph_element):
580 default = G.graph.get("node_default", {})
581 for node, data in G.nodes(data=True):
582 node_element = self.myElement("node", id=str(node))
583 self.add_attributes("node", node_element, data, default)
584 graph_element.append(node_element)
585
586 def add_edges(self, G, graph_element):
587 if G.is_multigraph():
588 for u, v, key, data in G.edges(data=True, keys=True):
589 edge_element = self.myElement(
590 "edge",
591 source=str(u),
592 target=str(v),
593 id=str(data.get(self.edge_id_from_attribute))
594 if self.edge_id_from_attribute
595 and self.edge_id_from_attribute in data
596 else str(key),
597 )
598 default = G.graph.get("edge_default", {})
599 self.add_attributes("edge", edge_element, data, default)
600 graph_element.append(edge_element)
601 else:
602 for u, v, data in G.edges(data=True):
603 if self.edge_id_from_attribute and self.edge_id_from_attribute in data:
604 # select attribute to be edge id
605 edge_element = self.myElement(
606 "edge",
607 source=str(u),
608 target=str(v),
609 id=str(data.get(self.edge_id_from_attribute)),
610 )
611 else:
612 # default: no edge id
613 edge_element = self.myElement("edge", source=str(u), target=str(v))
614 default = G.graph.get("edge_default", {})
615 self.add_attributes("edge", edge_element, data, default)
616 graph_element.append(edge_element)
617
618 def add_graph_element(self, G):
619 """
620 Serialize graph G in GraphML to the stream.
621 """
622 if G.is_directed():
623 default_edge_type = "directed"
624 else:
625 default_edge_type = "undirected"
626
627 graphid = G.graph.pop("id", None)
628 if graphid is None:
629 graph_element = self.myElement("graph", edgedefault=default_edge_type)
630 else:
631 graph_element = self.myElement(
632 "graph", edgedefault=default_edge_type, id=graphid
633 )
634 default = {}
635 data = {
636 k: v
637 for (k, v) in G.graph.items()
638 if k not in ["node_default", "edge_default"]
639 }
640 self.add_attributes("graph", graph_element, data, default)
641 self.add_nodes(G, graph_element)
642 self.add_edges(G, graph_element)
643
644 # self.attributes contains a mapping from XML Objects to a list of
645 # data that needs to be added to them.
646 # We postpone processing in order to do type inference/generalization.
647 # See self.attr_type
648 for xml_obj, data in self.attributes.items():
649 for k, v, scope, default in data:
650 xml_obj.append(
651 self.add_data(
652 str(k), self.attr_type(k, scope, v), str(v), scope, default
653 )
654 )
655 self.xml.append(graph_element)
656
657 def add_graphs(self, graph_list):
658 """Add many graphs to this GraphML document."""
659 for G in graph_list:
660 self.add_graph_element(G)
661
662 def dump(self, stream):
663 from xml.etree.ElementTree import ElementTree
664
665 if self.prettyprint:
666 self.indent(self.xml)
667 document = ElementTree(self.xml)
668 document.write(stream, encoding=self.encoding, xml_declaration=True)
669
670 def indent(self, elem, level=0):
671 # in-place prettyprint formatter
672 i = "\n" + level * " "
673 if len(elem):
674 if not elem.text or not elem.text.strip():
675 elem.text = i + " "
676 if not elem.tail or not elem.tail.strip():
677 elem.tail = i
678 for elem in elem:
679 self.indent(elem, level + 1)
680 if not elem.tail or not elem.tail.strip():
681 elem.tail = i
682 else:
683 if level and (not elem.tail or not elem.tail.strip()):
684 elem.tail = i
685
686
687class IncrementalElement:
688 """Wrapper for _IncrementalWriter providing an Element like interface.
689
690 This wrapper does not intend to be a complete implementation but rather to
691 deal with those calls used in GraphMLWriter.
692 """
693
694 def __init__(self, xml, prettyprint):
695 self.xml = xml
696 self.prettyprint = prettyprint
697
698 def append(self, element):
699 self.xml.write(element, pretty_print=self.prettyprint)
700
701
702class GraphMLWriterLxml(GraphMLWriter):
703 def __init__(
704 self,
705 path,
706 graph=None,
707 encoding="utf-8",
708 prettyprint=True,
709 infer_numeric_types=False,
710 named_key_ids=False,
711 edge_id_from_attribute=None,
712 ):
713 self.construct_types()
714 import lxml.etree as lxmletree
715
716 self.myElement = lxmletree.Element
717
718 self._encoding = encoding
719 self._prettyprint = prettyprint
720 self.named_key_ids = named_key_ids
721 self.edge_id_from_attribute = edge_id_from_attribute
722 self.infer_numeric_types = infer_numeric_types
723
724 self._xml_base = lxmletree.xmlfile(path, encoding=encoding)
725 self._xml = self._xml_base.__enter__()
726 self._xml.write_declaration()
727
728 # We need to have a xml variable that support insertion. This call is
729 # used for adding the keys to the document.
730 # We will store those keys in a plain list, and then after the graph
731 # element is closed we will add them to the main graphml element.
732 self.xml = []
733 self._keys = self.xml
734 self._graphml = self._xml.element(
735 "graphml",
736 {
737 "xmlns": self.NS_GRAPHML,
738 "xmlns:xsi": self.NS_XSI,
739 "xsi:schemaLocation": self.SCHEMALOCATION,
740 },
741 )
742 self._graphml.__enter__()
743 self.keys = {}
744 self.attribute_types = defaultdict(set)
745
746 if graph is not None:
747 self.add_graph_element(graph)
748
749 def add_graph_element(self, G):
750 """
751 Serialize graph G in GraphML to the stream.
752 """
753 if G.is_directed():
754 default_edge_type = "directed"
755 else:
756 default_edge_type = "undirected"
757
758 graphid = G.graph.pop("id", None)
759 if graphid is None:
760 graph_element = self._xml.element("graph", edgedefault=default_edge_type)
761 else:
762 graph_element = self._xml.element(
763 "graph", edgedefault=default_edge_type, id=graphid
764 )
765
766 # gather attributes types for the whole graph
767 # to find the most general numeric format needed.
768 # Then pass through attributes to create key_id for each.
769 graphdata = {
770 k: v
771 for k, v in G.graph.items()
772 if k not in ("node_default", "edge_default")
773 }
774 node_default = G.graph.get("node_default", {})
775 edge_default = G.graph.get("edge_default", {})
776 # Graph attributes
777 for k, v in graphdata.items():
778 self.attribute_types[(str(k), "graph")].add(type(v))
779 for k, v in graphdata.items():
780 element_type = self.get_xml_type(self.attr_type(k, "graph", v))
781 self.get_key(str(k), element_type, "graph", None)
782 # Nodes and data
783 for node, d in G.nodes(data=True):
784 for k, v in d.items():
785 self.attribute_types[(str(k), "node")].add(type(v))
786 for node, d in G.nodes(data=True):
787 for k, v in d.items():
788 T = self.get_xml_type(self.attr_type(k, "node", v))
789 self.get_key(str(k), T, "node", node_default.get(k))
790 # Edges and data
791 if G.is_multigraph():
792 for u, v, ekey, d in G.edges(keys=True, data=True):
793 for k, v in d.items():
794 self.attribute_types[(str(k), "edge")].add(type(v))
795 for u, v, ekey, d in G.edges(keys=True, data=True):
796 for k, v in d.items():
797 T = self.get_xml_type(self.attr_type(k, "edge", v))
798 self.get_key(str(k), T, "edge", edge_default.get(k))
799 else:
800 for u, v, d in G.edges(data=True):
801 for k, v in d.items():
802 self.attribute_types[(str(k), "edge")].add(type(v))
803 for u, v, d in G.edges(data=True):
804 for k, v in d.items():
805 T = self.get_xml_type(self.attr_type(k, "edge", v))
806 self.get_key(str(k), T, "edge", edge_default.get(k))
807
808 # Now add attribute keys to the xml file
809 for key in self.xml:
810 self._xml.write(key, pretty_print=self._prettyprint)
811
812 # The incremental_writer writes each node/edge as it is created
813 incremental_writer = IncrementalElement(self._xml, self._prettyprint)
814 with graph_element:
815 self.add_attributes("graph", incremental_writer, graphdata, {})
816 self.add_nodes(G, incremental_writer) # adds attributes too
817 self.add_edges(G, incremental_writer) # adds attributes too
818
819 def add_attributes(self, scope, xml_obj, data, default):
820 """Appends attribute data."""
821 for k, v in data.items():
822 data_element = self.add_data(
823 str(k), self.attr_type(str(k), scope, v), str(v), scope, default.get(k)
824 )
825 xml_obj.append(data_element)
826
827 def __str__(self):
828 return object.__str__(self)
829
830 def dump(self, stream=None):
831 self._graphml.__exit__(None, None, None)
832 self._xml_base.__exit__(None, None, None)
833
834
835# default is lxml is present.
836write_graphml = write_graphml_lxml
837
838
839class GraphMLReader(GraphML):
840 """Read a GraphML document. Produces NetworkX graph objects."""
841
842 def __init__(self, node_type=str, edge_key_type=int, force_multigraph=False):
843 self.construct_types()
844 self.node_type = node_type
845 self.edge_key_type = edge_key_type
846 self.multigraph = force_multigraph # If False, test for multiedges
847 self.edge_ids = {} # dict mapping (u,v) tuples to edge id attributes
848
849 def __call__(self, path=None, string=None):
850 from xml.etree.ElementTree import ElementTree, fromstring
851
852 if path is not None:
853 self.xml = ElementTree(file=path)
854 elif string is not None:
855 self.xml = fromstring(string)
856 else:
857 raise ValueError("Must specify either 'path' or 'string' as kwarg")
858 (keys, defaults) = self.find_graphml_keys(self.xml)
859 for g in self.xml.findall(f"{{{self.NS_GRAPHML}}}graph"):
860 yield self.make_graph(g, keys, defaults)
861
862 def make_graph(self, graph_xml, graphml_keys, defaults, G=None):
863 # set default graph type
864 edgedefault = graph_xml.get("edgedefault", None)
865 if G is None:
866 if edgedefault == "directed":
867 G = nx.MultiDiGraph()
868 else:
869 G = nx.MultiGraph()
870 # set defaults for graph attributes
871 G.graph["node_default"] = {}
872 G.graph["edge_default"] = {}
873 for key_id, value in defaults.items():
874 key_for = graphml_keys[key_id]["for"]
875 name = graphml_keys[key_id]["name"]
876 python_type = graphml_keys[key_id]["type"]
877 if key_for == "node":
878 G.graph["node_default"].update({name: python_type(value)})
879 if key_for == "edge":
880 G.graph["edge_default"].update({name: python_type(value)})
881 # hyperedges are not supported
882 hyperedge = graph_xml.find(f"{{{self.NS_GRAPHML}}}hyperedge")
883 if hyperedge is not None:
884 raise nx.NetworkXError("GraphML reader doesn't support hyperedges")
885 # add nodes
886 for node_xml in graph_xml.findall(f"{{{self.NS_GRAPHML}}}node"):
887 self.add_node(G, node_xml, graphml_keys, defaults)
888 # add edges
889 for edge_xml in graph_xml.findall(f"{{{self.NS_GRAPHML}}}edge"):
890 self.add_edge(G, edge_xml, graphml_keys)
891 # add graph data
892 data = self.decode_data_elements(graphml_keys, graph_xml)
893 G.graph.update(data)
894
895 # switch to Graph or DiGraph if no parallel edges were found
896 if self.multigraph:
897 return G
898
899 G = nx.DiGraph(G) if G.is_directed() else nx.Graph(G)
900 # add explicit edge "id" from file as attribute in NX graph.
901 nx.set_edge_attributes(G, values=self.edge_ids, name="id")
902 return G
903
904 def add_node(self, G, node_xml, graphml_keys, defaults):
905 """Add a node to the graph."""
906 # warn on finding unsupported ports tag
907 ports = node_xml.find(f"{{{self.NS_GRAPHML}}}port")
908 if ports is not None:
909 warnings.warn("GraphML port tag not supported.")
910 # find the node by id and cast it to the appropriate type
911 node_id = self.node_type(node_xml.get("id"))
912 # get data/attributes for node
913 data = self.decode_data_elements(graphml_keys, node_xml)
914 G.add_node(node_id, **data)
915 # get child nodes
916 if node_xml.attrib.get("yfiles.foldertype") == "group":
917 graph_xml = node_xml.find(f"{{{self.NS_GRAPHML}}}graph")
918 self.make_graph(graph_xml, graphml_keys, defaults, G)
919
920 def add_edge(self, G, edge_element, graphml_keys):
921 """Add an edge to the graph."""
922 # warn on finding unsupported ports tag
923 ports = edge_element.find(f"{{{self.NS_GRAPHML}}}port")
924 if ports is not None:
925 warnings.warn("GraphML port tag not supported.")
926
927 # raise error if we find mixed directed and undirected edges
928 directed = edge_element.get("directed")
929 if G.is_directed() and directed == "false":
930 msg = "directed=false edge found in directed graph."
931 raise nx.NetworkXError(msg)
932 if (not G.is_directed()) and directed == "true":
933 msg = "directed=true edge found in undirected graph."
934 raise nx.NetworkXError(msg)
935
936 source = self.node_type(edge_element.get("source"))
937 target = self.node_type(edge_element.get("target"))
938 data = self.decode_data_elements(graphml_keys, edge_element)
939 # GraphML stores edge ids as an attribute
940 # NetworkX uses them as keys in multigraphs too if no key
941 # attribute is specified
942 edge_id = edge_element.get("id")
943 if edge_id:
944 # self.edge_ids is used by `make_graph` method for non-multigraphs
945 self.edge_ids[source, target] = edge_id
946 try:
947 edge_id = self.edge_key_type(edge_id)
948 except ValueError: # Could not convert.
949 pass
950 else:
951 edge_id = data.get("key")
952
953 if G.has_edge(source, target):
954 # mark this as a multigraph
955 self.multigraph = True
956
957 # Use add_edges_from to avoid error with add_edge when `'key' in data`
958 # Note there is only one edge here...
959 G.add_edges_from([(source, target, edge_id, data)])
960
961 def decode_data_elements(self, graphml_keys, obj_xml):
962 """Use the key information to decode the data XML if present."""
963 data = {}
964 for data_element in obj_xml.findall(f"{{{self.NS_GRAPHML}}}data"):
965 key = data_element.get("key")
966 try:
967 data_name = graphml_keys[key]["name"]
968 data_type = graphml_keys[key]["type"]
969 except KeyError as err:
970 raise nx.NetworkXError(f"Bad GraphML data: no key {key}") from err
971 text = data_element.text
972 # assume anything with subelements is a yfiles extension
973 if text is not None and len(list(data_element)) == 0:
974 if data_type is bool:
975 # Ignore cases.
976 # http://docs.oracle.com/javase/6/docs/api/java/lang/
977 # Boolean.html#parseBoolean%28java.lang.String%29
978 data[data_name] = self.convert_bool[text.lower()]
979 else:
980 data[data_name] = data_type(text)
981 elif len(list(data_element)) > 0:
982 # Assume yfiles as subelements, try to extract node_label
983 node_label = None
984 # set GenericNode's configuration as shape type
985 gn = data_element.find(f"{{{self.NS_Y}}}GenericNode")
986 if gn is not None:
987 data["shape_type"] = gn.get("configuration")
988 for node_type in ["GenericNode", "ShapeNode", "SVGNode", "ImageNode"]:
989 pref = f"{{{self.NS_Y}}}{node_type}/{{{self.NS_Y}}}"
990 geometry = data_element.find(f"{pref}Geometry")
991 if geometry is not None:
992 data["x"] = geometry.get("x")
993 data["y"] = geometry.get("y")
994 if node_label is None:
995 node_label = data_element.find(f"{pref}NodeLabel")
996 shape = data_element.find(f"{pref}Shape")
997 if shape is not None:
998 data["shape_type"] = shape.get("type")
999 if node_label is not None:
1000 data["label"] = node_label.text
1001
1002 # check all the different types of edges available in yEd.
1003 for edge_type in [
1004 "PolyLineEdge",
1005 "SplineEdge",
1006 "QuadCurveEdge",
1007 "BezierEdge",
1008 "ArcEdge",
1009 ]:
1010 pref = f"{{{self.NS_Y}}}{edge_type}/{{{self.NS_Y}}}"
1011 edge_label = data_element.find(f"{pref}EdgeLabel")
1012 if edge_label is not None:
1013 break
1014 if edge_label is not None:
1015 data["label"] = edge_label.text
1016 elif text is None:
1017 data[data_name] = ""
1018 return data
1019
1020 def find_graphml_keys(self, graph_element):
1021 """Extracts all the keys and key defaults from the xml."""
1022 graphml_keys = {}
1023 graphml_key_defaults = {}
1024 for k in graph_element.findall(f"{{{self.NS_GRAPHML}}}key"):
1025 attr_id = k.get("id")
1026 attr_type = k.get("attr.type")
1027 attr_name = k.get("attr.name")
1028 yfiles_type = k.get("yfiles.type")
1029 if yfiles_type is not None:
1030 attr_name = yfiles_type
1031 attr_type = "yfiles"
1032 if attr_type is None:
1033 attr_type = "string"
1034 warnings.warn(f"No key type for id {attr_id}. Using string")
1035 if attr_name is None:
1036 raise nx.NetworkXError(f"Unknown key for id {attr_id}.")
1037 graphml_keys[attr_id] = {
1038 "name": attr_name,
1039 "type": self.python_type[attr_type],
1040 "for": k.get("for"),
1041 }
1042 # check for "default" sub-element of key element
1043 default = k.find(f"{{{self.NS_GRAPHML}}}default")
1044 if default is not None:
1045 # Handle default values identically to data element values
1046 python_type = graphml_keys[attr_id]["type"]
1047 if python_type is bool:
1048 graphml_key_defaults[attr_id] = self.convert_bool[
1049 default.text.lower()
1050 ]
1051 else:
1052 graphml_key_defaults[attr_id] = python_type(default.text)
1053 return graphml_keys, graphml_key_defaults