Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/readwrite/graphml.py: 46%
416 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
1"""
2*******
3GraphML
4*******
5Read and write graphs in GraphML format.
7.. warning::
9 This parser uses the standard xml library present in Python, which is
10 insecure - see :external+python:mod:`xml` for additional information.
11 Only parse GraphML files you trust.
13This implementation does not support mixed graphs (directed and unidirected
14edges together), hyperedges, nested graphs, or ports.
16"GraphML is a comprehensive and easy-to-use file format for graphs. It
17consists of a language core to describe the structural properties of a
18graph and a flexible extension mechanism to add application-specific
19data. Its main features include support of
21 * directed, undirected, and mixed graphs,
22 * hypergraphs,
23 * hierarchical graphs,
24 * graphical representations,
25 * references to external data,
26 * application-specific attribute data, and
27 * light-weight parsers.
29Unlike many other file formats for graphs, GraphML does not use a
30custom syntax. Instead, it is based on XML and hence ideally suited as
31a common denominator for all kinds of services generating, archiving,
32or processing graphs."
34http://graphml.graphdrawing.org/
36Format
37------
38GraphML is an XML format. See
39http://graphml.graphdrawing.org/specification.html for the specification and
40http://graphml.graphdrawing.org/primer/graphml-primer.html
41for examples.
42"""
43import warnings
44from collections import defaultdict
46import networkx as nx
47from networkx.utils import open_file
49__all__ = [
50 "write_graphml",
51 "read_graphml",
52 "generate_graphml",
53 "write_graphml_xml",
54 "write_graphml_lxml",
55 "parse_graphml",
56 "GraphMLWriter",
57 "GraphMLReader",
58]
61@open_file(1, mode="wb")
62def write_graphml_xml(
63 G,
64 path,
65 encoding="utf-8",
66 prettyprint=True,
67 infer_numeric_types=False,
68 named_key_ids=False,
69 edge_id_from_attribute=None,
70):
71 """Write G in GraphML XML format to path
73 Parameters
74 ----------
75 G : graph
76 A networkx graph
77 path : file or string
78 File or filename to write.
79 Filenames ending in .gz or .bz2 will be compressed.
80 encoding : string (optional)
81 Encoding for text data.
82 prettyprint : bool (optional)
83 If True use line breaks and indenting in output XML.
84 infer_numeric_types : boolean
85 Determine if numeric types should be generalized.
86 For example, if edges have both int and float 'weight' attributes,
87 we infer in GraphML that both are floats.
88 named_key_ids : bool (optional)
89 If True use attr.name as value for key elements' id attribute.
90 edge_id_from_attribute : dict key (optional)
91 If provided, the graphml edge id is set by looking up the corresponding
92 edge data attribute keyed by this parameter. If `None` or the key does not exist in edge data,
93 the edge id is set by the edge key if `G` is a MultiGraph, else the edge id is left unset.
95 Examples
96 --------
97 >>> G = nx.path_graph(4)
98 >>> nx.write_graphml(G, "test.graphml")
100 Notes
101 -----
102 This implementation does not support mixed graphs (directed
103 and unidirected edges together) hyperedges, nested graphs, or ports.
104 """
105 writer = GraphMLWriter(
106 encoding=encoding,
107 prettyprint=prettyprint,
108 infer_numeric_types=infer_numeric_types,
109 named_key_ids=named_key_ids,
110 edge_id_from_attribute=edge_id_from_attribute,
111 )
112 writer.add_graph_element(G)
113 writer.dump(path)
116@open_file(1, mode="wb")
117def write_graphml_lxml(
118 G,
119 path,
120 encoding="utf-8",
121 prettyprint=True,
122 infer_numeric_types=False,
123 named_key_ids=False,
124 edge_id_from_attribute=None,
125):
126 """Write G in GraphML XML format to path
128 This function uses the LXML framework and should be faster than
129 the version using the xml library.
131 Parameters
132 ----------
133 G : graph
134 A networkx graph
135 path : file or string
136 File or filename to write.
137 Filenames ending in .gz or .bz2 will be compressed.
138 encoding : string (optional)
139 Encoding for text data.
140 prettyprint : bool (optional)
141 If True use line breaks and indenting in output XML.
142 infer_numeric_types : boolean
143 Determine if numeric types should be generalized.
144 For example, if edges have both int and float 'weight' attributes,
145 we infer in GraphML that both are floats.
146 named_key_ids : bool (optional)
147 If True use attr.name as value for key elements' id attribute.
148 edge_id_from_attribute : dict key (optional)
149 If provided, the graphml edge id is set by looking up the corresponding
150 edge data attribute keyed by this parameter. If `None` or the key does not exist in edge data,
151 the edge id is set by the edge key if `G` is a MultiGraph, else the edge id is left unset.
153 Examples
154 --------
155 >>> G = nx.path_graph(4)
156 >>> nx.write_graphml_lxml(G, "fourpath.graphml")
158 Notes
159 -----
160 This implementation does not support mixed graphs (directed
161 and unidirected edges together) hyperedges, nested graphs, or ports.
162 """
163 try:
164 import lxml.etree as lxmletree
165 except ImportError:
166 return write_graphml_xml(
167 G,
168 path,
169 encoding,
170 prettyprint,
171 infer_numeric_types,
172 named_key_ids,
173 edge_id_from_attribute,
174 )
176 writer = GraphMLWriterLxml(
177 path,
178 graph=G,
179 encoding=encoding,
180 prettyprint=prettyprint,
181 infer_numeric_types=infer_numeric_types,
182 named_key_ids=named_key_ids,
183 edge_id_from_attribute=edge_id_from_attribute,
184 )
185 writer.dump()
188def generate_graphml(
189 G,
190 encoding="utf-8",
191 prettyprint=True,
192 named_key_ids=False,
193 edge_id_from_attribute=None,
194):
195 """Generate GraphML lines for G
197 Parameters
198 ----------
199 G : graph
200 A networkx graph
201 encoding : string (optional)
202 Encoding for text data.
203 prettyprint : bool (optional)
204 If True use line breaks and indenting in output XML.
205 named_key_ids : bool (optional)
206 If True use attr.name as value for key elements' id attribute.
207 edge_id_from_attribute : dict key (optional)
208 If provided, the graphml edge id is set by looking up the corresponding
209 edge data attribute keyed by this parameter. If `None` or the key does not exist in edge data,
210 the edge id is set by the edge key if `G` is a MultiGraph, else the edge id is left unset.
212 Examples
213 --------
214 >>> G = nx.path_graph(4)
215 >>> linefeed = chr(10) # linefeed = \n
216 >>> s = linefeed.join(nx.generate_graphml(G))
217 >>> for line in nx.generate_graphml(G): # doctest: +SKIP
218 ... print(line)
220 Notes
221 -----
222 This implementation does not support mixed graphs (directed and unidirected
223 edges together) hyperedges, nested graphs, or ports.
224 """
225 writer = GraphMLWriter(
226 encoding=encoding,
227 prettyprint=prettyprint,
228 named_key_ids=named_key_ids,
229 edge_id_from_attribute=edge_id_from_attribute,
230 )
231 writer.add_graph_element(G)
232 yield from str(writer).splitlines()
235@open_file(0, mode="rb")
236@nx._dispatch(graphs=None)
237def read_graphml(path, node_type=str, edge_key_type=int, force_multigraph=False):
238 """Read graph in GraphML format from path.
240 Parameters
241 ----------
242 path : file or string
243 File or filename to write.
244 Filenames ending in .gz or .bz2 will be compressed.
246 node_type: Python type (default: str)
247 Convert node ids to this type
249 edge_key_type: Python type (default: int)
250 Convert graphml edge ids to this type. Multigraphs use id as edge key.
251 Non-multigraphs add to edge attribute dict with name "id".
253 force_multigraph : bool (default: False)
254 If True, return a multigraph with edge keys. If False (the default)
255 return a multigraph when multiedges are in the graph.
257 Returns
258 -------
259 graph: NetworkX graph
260 If parallel edges are present or `force_multigraph=True` then
261 a MultiGraph or MultiDiGraph is returned. Otherwise a Graph/DiGraph.
262 The returned graph is directed if the file indicates it should be.
264 Notes
265 -----
266 Default node and edge attributes are not propagated to each node and edge.
267 They can be obtained from `G.graph` and applied to node and edge attributes
268 if desired using something like this:
270 >>> default_color = G.graph["node_default"]["color"] # doctest: +SKIP
271 >>> for node, data in G.nodes(data=True): # doctest: +SKIP
272 ... if "color" not in data:
273 ... data["color"] = default_color
274 >>> default_color = G.graph["edge_default"]["color"] # doctest: +SKIP
275 >>> for u, v, data in G.edges(data=True): # doctest: +SKIP
276 ... if "color" not in data:
277 ... data["color"] = default_color
279 This implementation does not support mixed graphs (directed and unidirected
280 edges together), hypergraphs, nested graphs, or ports.
282 For multigraphs the GraphML edge "id" will be used as the edge
283 key. If not specified then they "key" attribute will be used. If
284 there is no "key" attribute a default NetworkX multigraph edge key
285 will be provided.
287 Files with the yEd "yfiles" extension can be read. The type of the node's
288 shape is preserved in the `shape_type` node attribute.
290 yEd compressed files ("file.graphmlz" extension) can be read by renaming
291 the file to "file.graphml.gz".
293 """
294 reader = GraphMLReader(node_type, edge_key_type, force_multigraph)
295 # need to check for multiple graphs
296 glist = list(reader(path=path))
297 if len(glist) == 0:
298 # If no graph comes back, try looking for an incomplete header
299 header = b'<graphml xmlns="http://graphml.graphdrawing.org/xmlns">'
300 path.seek(0)
301 old_bytes = path.read()
302 new_bytes = old_bytes.replace(b"<graphml>", header)
303 glist = list(reader(string=new_bytes))
304 if len(glist) == 0:
305 raise nx.NetworkXError("file not successfully read as graphml")
306 return glist[0]
309@nx._dispatch(graphs=None)
310def parse_graphml(
311 graphml_string, node_type=str, edge_key_type=int, force_multigraph=False
312):
313 """Read graph in GraphML format from string.
315 Parameters
316 ----------
317 graphml_string : string
318 String containing graphml information
319 (e.g., contents of a graphml file).
321 node_type: Python type (default: str)
322 Convert node ids to this type
324 edge_key_type: Python type (default: int)
325 Convert graphml edge ids to this type. Multigraphs use id as edge key.
326 Non-multigraphs add to edge attribute dict with name "id".
328 force_multigraph : bool (default: False)
329 If True, return a multigraph with edge keys. If False (the default)
330 return a multigraph when multiedges are in the graph.
333 Returns
334 -------
335 graph: NetworkX graph
336 If no parallel edges are found a Graph or DiGraph is returned.
337 Otherwise a MultiGraph or MultiDiGraph is returned.
339 Examples
340 --------
341 >>> G = nx.path_graph(4)
342 >>> linefeed = chr(10) # linefeed = \n
343 >>> s = linefeed.join(nx.generate_graphml(G))
344 >>> H = nx.parse_graphml(s)
346 Notes
347 -----
348 Default node and edge attributes are not propagated to each node and edge.
349 They can be obtained from `G.graph` and applied to node and edge attributes
350 if desired using something like this:
352 >>> default_color = G.graph["node_default"]["color"] # doctest: +SKIP
353 >>> for node, data in G.nodes(data=True): # doctest: +SKIP
354 ... if "color" not in data:
355 ... data["color"] = default_color
356 >>> default_color = G.graph["edge_default"]["color"] # doctest: +SKIP
357 >>> for u, v, data in G.edges(data=True): # doctest: +SKIP
358 ... if "color" not in data:
359 ... data["color"] = default_color
361 This implementation does not support mixed graphs (directed and unidirected
362 edges together), hypergraphs, nested graphs, or ports.
364 For multigraphs the GraphML edge "id" will be used as the edge
365 key. If not specified then they "key" attribute will be used. If
366 there is no "key" attribute a default NetworkX multigraph edge key
367 will be provided.
369 """
370 reader = GraphMLReader(node_type, edge_key_type, force_multigraph)
371 # need to check for multiple graphs
372 glist = list(reader(string=graphml_string))
373 if len(glist) == 0:
374 # If no graph comes back, try looking for an incomplete header
375 header = '<graphml xmlns="http://graphml.graphdrawing.org/xmlns">'
376 new_string = graphml_string.replace("<graphml>", header)
377 glist = list(reader(string=new_string))
378 if len(glist) == 0:
379 raise nx.NetworkXError("file not successfully read as graphml")
380 return glist[0]
383class GraphML:
384 NS_GRAPHML = "http://graphml.graphdrawing.org/xmlns"
385 NS_XSI = "http://www.w3.org/2001/XMLSchema-instance"
386 # xmlns:y="http://www.yworks.com/xml/graphml"
387 NS_Y = "http://www.yworks.com/xml/graphml"
388 SCHEMALOCATION = " ".join(
389 [
390 "http://graphml.graphdrawing.org/xmlns",
391 "http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd",
392 ]
393 )
395 def construct_types(self):
396 types = [
397 (int, "integer"), # for Gephi GraphML bug
398 (str, "yfiles"),
399 (str, "string"),
400 (int, "int"),
401 (int, "long"),
402 (float, "float"),
403 (float, "double"),
404 (bool, "boolean"),
405 ]
407 # These additions to types allow writing numpy types
408 try:
409 import numpy as np
410 except:
411 pass
412 else:
413 # prepend so that python types are created upon read (last entry wins)
414 types = [
415 (np.float64, "float"),
416 (np.float32, "float"),
417 (np.float16, "float"),
418 (np.int_, "int"),
419 (np.int8, "int"),
420 (np.int16, "int"),
421 (np.int32, "int"),
422 (np.int64, "int"),
423 (np.uint8, "int"),
424 (np.uint16, "int"),
425 (np.uint32, "int"),
426 (np.uint64, "int"),
427 (np.int_, "int"),
428 (np.intc, "int"),
429 (np.intp, "int"),
430 ] + types
432 self.xml_type = dict(types)
433 self.python_type = dict(reversed(a) for a in types)
435 # This page says that data types in GraphML follow Java(TM).
436 # http://graphml.graphdrawing.org/primer/graphml-primer.html#AttributesDefinition
437 # true and false are the only boolean literals:
438 # http://en.wikibooks.org/wiki/Java_Programming/Literals#Boolean_Literals
439 convert_bool = {
440 # We use data.lower() in actual use.
441 "true": True,
442 "false": False,
443 # Include integer strings for convenience.
444 "0": False,
445 0: False,
446 "1": True,
447 1: True,
448 }
450 def get_xml_type(self, key):
451 """Wrapper around the xml_type dict that raises a more informative
452 exception message when a user attempts to use data of a type not
453 supported by GraphML."""
454 try:
455 return self.xml_type[key]
456 except KeyError as err:
457 raise TypeError(
458 f"GraphML does not support type {type(key)} as data values."
459 ) from err
462class GraphMLWriter(GraphML):
463 def __init__(
464 self,
465 graph=None,
466 encoding="utf-8",
467 prettyprint=True,
468 infer_numeric_types=False,
469 named_key_ids=False,
470 edge_id_from_attribute=None,
471 ):
472 self.construct_types()
473 from xml.etree.ElementTree import Element
475 self.myElement = Element
477 self.infer_numeric_types = infer_numeric_types
478 self.prettyprint = prettyprint
479 self.named_key_ids = named_key_ids
480 self.edge_id_from_attribute = edge_id_from_attribute
481 self.encoding = encoding
482 self.xml = self.myElement(
483 "graphml",
484 {
485 "xmlns": self.NS_GRAPHML,
486 "xmlns:xsi": self.NS_XSI,
487 "xsi:schemaLocation": self.SCHEMALOCATION,
488 },
489 )
490 self.keys = {}
491 self.attributes = defaultdict(list)
492 self.attribute_types = defaultdict(set)
494 if graph is not None:
495 self.add_graph_element(graph)
497 def __str__(self):
498 from xml.etree.ElementTree import tostring
500 if self.prettyprint:
501 self.indent(self.xml)
502 s = tostring(self.xml).decode(self.encoding)
503 return s
505 def attr_type(self, name, scope, value):
506 """Infer the attribute type of data named name. Currently this only
507 supports inference of numeric types.
509 If self.infer_numeric_types is false, type is used. Otherwise, pick the
510 most general of types found across all values with name and scope. This
511 means edges with data named 'weight' are treated separately from nodes
512 with data named 'weight'.
513 """
514 if self.infer_numeric_types:
515 types = self.attribute_types[(name, scope)]
517 if len(types) > 1:
518 types = {self.get_xml_type(t) for t in types}
519 if "string" in types:
520 return str
521 elif "float" in types or "double" in types:
522 return float
523 else:
524 return int
525 else:
526 return list(types)[0]
527 else:
528 return type(value)
530 def get_key(self, name, attr_type, scope, default):
531 keys_key = (name, attr_type, scope)
532 try:
533 return self.keys[keys_key]
534 except KeyError:
535 if self.named_key_ids:
536 new_id = name
537 else:
538 new_id = f"d{len(list(self.keys))}"
540 self.keys[keys_key] = new_id
541 key_kwargs = {
542 "id": new_id,
543 "for": scope,
544 "attr.name": name,
545 "attr.type": attr_type,
546 }
547 key_element = self.myElement("key", **key_kwargs)
548 # add subelement for data default value if present
549 if default is not None:
550 default_element = self.myElement("default")
551 default_element.text = str(default)
552 key_element.append(default_element)
553 self.xml.insert(0, key_element)
554 return new_id
556 def add_data(self, name, element_type, value, scope="all", default=None):
557 """
558 Make a data element for an edge or a node. Keep a log of the
559 type in the keys table.
560 """
561 if element_type not in self.xml_type:
562 raise nx.NetworkXError(
563 f"GraphML writer does not support {element_type} as data values."
564 )
565 keyid = self.get_key(name, self.get_xml_type(element_type), scope, default)
566 data_element = self.myElement("data", key=keyid)
567 data_element.text = str(value)
568 return data_element
570 def add_attributes(self, scope, xml_obj, data, default):
571 """Appends attribute data to edges or nodes, and stores type information
572 to be added later. See add_graph_element.
573 """
574 for k, v in data.items():
575 self.attribute_types[(str(k), scope)].add(type(v))
576 self.attributes[xml_obj].append([k, v, scope, default.get(k)])
578 def add_nodes(self, G, graph_element):
579 default = G.graph.get("node_default", {})
580 for node, data in G.nodes(data=True):
581 node_element = self.myElement("node", id=str(node))
582 self.add_attributes("node", node_element, data, default)
583 graph_element.append(node_element)
585 def add_edges(self, G, graph_element):
586 if G.is_multigraph():
587 for u, v, key, data in G.edges(data=True, keys=True):
588 edge_element = self.myElement(
589 "edge",
590 source=str(u),
591 target=str(v),
592 id=str(data.get(self.edge_id_from_attribute))
593 if self.edge_id_from_attribute
594 and self.edge_id_from_attribute in data
595 else str(key),
596 )
597 default = G.graph.get("edge_default", {})
598 self.add_attributes("edge", edge_element, data, default)
599 graph_element.append(edge_element)
600 else:
601 for u, v, data in G.edges(data=True):
602 if self.edge_id_from_attribute and self.edge_id_from_attribute in data:
603 # select attribute to be edge id
604 edge_element = self.myElement(
605 "edge",
606 source=str(u),
607 target=str(v),
608 id=str(data.get(self.edge_id_from_attribute)),
609 )
610 else:
611 # default: no edge id
612 edge_element = self.myElement("edge", source=str(u), target=str(v))
613 default = G.graph.get("edge_default", {})
614 self.add_attributes("edge", edge_element, data, default)
615 graph_element.append(edge_element)
617 def add_graph_element(self, G):
618 """
619 Serialize graph G in GraphML to the stream.
620 """
621 if G.is_directed():
622 default_edge_type = "directed"
623 else:
624 default_edge_type = "undirected"
626 graphid = G.graph.pop("id", None)
627 if graphid is None:
628 graph_element = self.myElement("graph", edgedefault=default_edge_type)
629 else:
630 graph_element = self.myElement(
631 "graph", edgedefault=default_edge_type, id=graphid
632 )
633 default = {}
634 data = {
635 k: v
636 for (k, v) in G.graph.items()
637 if k not in ["node_default", "edge_default"]
638 }
639 self.add_attributes("graph", graph_element, data, default)
640 self.add_nodes(G, graph_element)
641 self.add_edges(G, graph_element)
643 # self.attributes contains a mapping from XML Objects to a list of
644 # data that needs to be added to them.
645 # We postpone processing in order to do type inference/generalization.
646 # See self.attr_type
647 for xml_obj, data in self.attributes.items():
648 for k, v, scope, default in data:
649 xml_obj.append(
650 self.add_data(
651 str(k), self.attr_type(k, scope, v), str(v), scope, default
652 )
653 )
654 self.xml.append(graph_element)
656 def add_graphs(self, graph_list):
657 """Add many graphs to this GraphML document."""
658 for G in graph_list:
659 self.add_graph_element(G)
661 def dump(self, stream):
662 from xml.etree.ElementTree import ElementTree
664 if self.prettyprint:
665 self.indent(self.xml)
666 document = ElementTree(self.xml)
667 document.write(stream, encoding=self.encoding, xml_declaration=True)
669 def indent(self, elem, level=0):
670 # in-place prettyprint formatter
671 i = "\n" + level * " "
672 if len(elem):
673 if not elem.text or not elem.text.strip():
674 elem.text = i + " "
675 if not elem.tail or not elem.tail.strip():
676 elem.tail = i
677 for elem in elem:
678 self.indent(elem, level + 1)
679 if not elem.tail or not elem.tail.strip():
680 elem.tail = i
681 else:
682 if level and (not elem.tail or not elem.tail.strip()):
683 elem.tail = i
686class IncrementalElement:
687 """Wrapper for _IncrementalWriter providing an Element like interface.
689 This wrapper does not intend to be a complete implementation but rather to
690 deal with those calls used in GraphMLWriter.
691 """
693 def __init__(self, xml, prettyprint):
694 self.xml = xml
695 self.prettyprint = prettyprint
697 def append(self, element):
698 self.xml.write(element, pretty_print=self.prettyprint)
701class GraphMLWriterLxml(GraphMLWriter):
702 def __init__(
703 self,
704 path,
705 graph=None,
706 encoding="utf-8",
707 prettyprint=True,
708 infer_numeric_types=False,
709 named_key_ids=False,
710 edge_id_from_attribute=None,
711 ):
712 self.construct_types()
713 import lxml.etree as lxmletree
715 self.myElement = lxmletree.Element
717 self._encoding = encoding
718 self._prettyprint = prettyprint
719 self.named_key_ids = named_key_ids
720 self.edge_id_from_attribute = edge_id_from_attribute
721 self.infer_numeric_types = infer_numeric_types
723 self._xml_base = lxmletree.xmlfile(path, encoding=encoding)
724 self._xml = self._xml_base.__enter__()
725 self._xml.write_declaration()
727 # We need to have a xml variable that support insertion. This call is
728 # used for adding the keys to the document.
729 # We will store those keys in a plain list, and then after the graph
730 # element is closed we will add them to the main graphml element.
731 self.xml = []
732 self._keys = self.xml
733 self._graphml = self._xml.element(
734 "graphml",
735 {
736 "xmlns": self.NS_GRAPHML,
737 "xmlns:xsi": self.NS_XSI,
738 "xsi:schemaLocation": self.SCHEMALOCATION,
739 },
740 )
741 self._graphml.__enter__()
742 self.keys = {}
743 self.attribute_types = defaultdict(set)
745 if graph is not None:
746 self.add_graph_element(graph)
748 def add_graph_element(self, G):
749 """
750 Serialize graph G in GraphML to the stream.
751 """
752 if G.is_directed():
753 default_edge_type = "directed"
754 else:
755 default_edge_type = "undirected"
757 graphid = G.graph.pop("id", None)
758 if graphid is None:
759 graph_element = self._xml.element("graph", edgedefault=default_edge_type)
760 else:
761 graph_element = self._xml.element(
762 "graph", edgedefault=default_edge_type, id=graphid
763 )
765 # gather attributes types for the whole graph
766 # to find the most general numeric format needed.
767 # Then pass through attributes to create key_id for each.
768 graphdata = {
769 k: v
770 for k, v in G.graph.items()
771 if k not in ("node_default", "edge_default")
772 }
773 node_default = G.graph.get("node_default", {})
774 edge_default = G.graph.get("edge_default", {})
775 # Graph attributes
776 for k, v in graphdata.items():
777 self.attribute_types[(str(k), "graph")].add(type(v))
778 for k, v in graphdata.items():
779 element_type = self.get_xml_type(self.attr_type(k, "graph", v))
780 self.get_key(str(k), element_type, "graph", None)
781 # Nodes and data
782 for node, d in G.nodes(data=True):
783 for k, v in d.items():
784 self.attribute_types[(str(k), "node")].add(type(v))
785 for node, d in G.nodes(data=True):
786 for k, v in d.items():
787 T = self.get_xml_type(self.attr_type(k, "node", v))
788 self.get_key(str(k), T, "node", node_default.get(k))
789 # Edges and data
790 if G.is_multigraph():
791 for u, v, ekey, d in G.edges(keys=True, data=True):
792 for k, v in d.items():
793 self.attribute_types[(str(k), "edge")].add(type(v))
794 for u, v, ekey, d in G.edges(keys=True, data=True):
795 for k, v in d.items():
796 T = self.get_xml_type(self.attr_type(k, "edge", v))
797 self.get_key(str(k), T, "edge", edge_default.get(k))
798 else:
799 for u, v, d in G.edges(data=True):
800 for k, v in d.items():
801 self.attribute_types[(str(k), "edge")].add(type(v))
802 for u, v, d in G.edges(data=True):
803 for k, v in d.items():
804 T = self.get_xml_type(self.attr_type(k, "edge", v))
805 self.get_key(str(k), T, "edge", edge_default.get(k))
807 # Now add attribute keys to the xml file
808 for key in self.xml:
809 self._xml.write(key, pretty_print=self._prettyprint)
811 # The incremental_writer writes each node/edge as it is created
812 incremental_writer = IncrementalElement(self._xml, self._prettyprint)
813 with graph_element:
814 self.add_attributes("graph", incremental_writer, graphdata, {})
815 self.add_nodes(G, incremental_writer) # adds attributes too
816 self.add_edges(G, incremental_writer) # adds attributes too
818 def add_attributes(self, scope, xml_obj, data, default):
819 """Appends attribute data."""
820 for k, v in data.items():
821 data_element = self.add_data(
822 str(k), self.attr_type(str(k), scope, v), str(v), scope, default.get(k)
823 )
824 xml_obj.append(data_element)
826 def __str__(self):
827 return object.__str__(self)
829 def dump(self):
830 self._graphml.__exit__(None, None, None)
831 self._xml_base.__exit__(None, None, None)
834# default is lxml is present.
835write_graphml = write_graphml_lxml
838class GraphMLReader(GraphML):
839 """Read a GraphML document. Produces NetworkX graph objects."""
841 def __init__(self, node_type=str, edge_key_type=int, force_multigraph=False):
842 self.construct_types()
843 self.node_type = node_type
844 self.edge_key_type = edge_key_type
845 self.multigraph = force_multigraph # If False, test for multiedges
846 self.edge_ids = {} # dict mapping (u,v) tuples to edge id attributes
848 def __call__(self, path=None, string=None):
849 from xml.etree.ElementTree import ElementTree, fromstring
851 if path is not None:
852 self.xml = ElementTree(file=path)
853 elif string is not None:
854 self.xml = fromstring(string)
855 else:
856 raise ValueError("Must specify either 'path' or 'string' as kwarg")
857 (keys, defaults) = self.find_graphml_keys(self.xml)
858 for g in self.xml.findall(f"{{{self.NS_GRAPHML}}}graph"):
859 yield self.make_graph(g, keys, defaults)
861 def make_graph(self, graph_xml, graphml_keys, defaults, G=None):
862 # set default graph type
863 edgedefault = graph_xml.get("edgedefault", None)
864 if G is None:
865 if edgedefault == "directed":
866 G = nx.MultiDiGraph()
867 else:
868 G = nx.MultiGraph()
869 # set defaults for graph attributes
870 G.graph["node_default"] = {}
871 G.graph["edge_default"] = {}
872 for key_id, value in defaults.items():
873 key_for = graphml_keys[key_id]["for"]
874 name = graphml_keys[key_id]["name"]
875 python_type = graphml_keys[key_id]["type"]
876 if key_for == "node":
877 G.graph["node_default"].update({name: python_type(value)})
878 if key_for == "edge":
879 G.graph["edge_default"].update({name: python_type(value)})
880 # hyperedges are not supported
881 hyperedge = graph_xml.find(f"{{{self.NS_GRAPHML}}}hyperedge")
882 if hyperedge is not None:
883 raise nx.NetworkXError("GraphML reader doesn't support hyperedges")
884 # add nodes
885 for node_xml in graph_xml.findall(f"{{{self.NS_GRAPHML}}}node"):
886 self.add_node(G, node_xml, graphml_keys, defaults)
887 # add edges
888 for edge_xml in graph_xml.findall(f"{{{self.NS_GRAPHML}}}edge"):
889 self.add_edge(G, edge_xml, graphml_keys)
890 # add graph data
891 data = self.decode_data_elements(graphml_keys, graph_xml)
892 G.graph.update(data)
894 # switch to Graph or DiGraph if no parallel edges were found
895 if self.multigraph:
896 return G
898 G = nx.DiGraph(G) if G.is_directed() else nx.Graph(G)
899 # add explicit edge "id" from file as attribute in NX graph.
900 nx.set_edge_attributes(G, values=self.edge_ids, name="id")
901 return G
903 def add_node(self, G, node_xml, graphml_keys, defaults):
904 """Add a node to the graph."""
905 # warn on finding unsupported ports tag
906 ports = node_xml.find(f"{{{self.NS_GRAPHML}}}port")
907 if ports is not None:
908 warnings.warn("GraphML port tag not supported.")
909 # find the node by id and cast it to the appropriate type
910 node_id = self.node_type(node_xml.get("id"))
911 # get data/attributes for node
912 data = self.decode_data_elements(graphml_keys, node_xml)
913 G.add_node(node_id, **data)
914 # get child nodes
915 if node_xml.attrib.get("yfiles.foldertype") == "group":
916 graph_xml = node_xml.find(f"{{{self.NS_GRAPHML}}}graph")
917 self.make_graph(graph_xml, graphml_keys, defaults, G)
919 def add_edge(self, G, edge_element, graphml_keys):
920 """Add an edge to the graph."""
921 # warn on finding unsupported ports tag
922 ports = edge_element.find(f"{{{self.NS_GRAPHML}}}port")
923 if ports is not None:
924 warnings.warn("GraphML port tag not supported.")
926 # raise error if we find mixed directed and undirected edges
927 directed = edge_element.get("directed")
928 if G.is_directed() and directed == "false":
929 msg = "directed=false edge found in directed graph."
930 raise nx.NetworkXError(msg)
931 if (not G.is_directed()) and directed == "true":
932 msg = "directed=true edge found in undirected graph."
933 raise nx.NetworkXError(msg)
935 source = self.node_type(edge_element.get("source"))
936 target = self.node_type(edge_element.get("target"))
937 data = self.decode_data_elements(graphml_keys, edge_element)
938 # GraphML stores edge ids as an attribute
939 # NetworkX uses them as keys in multigraphs too if no key
940 # attribute is specified
941 edge_id = edge_element.get("id")
942 if edge_id:
943 # self.edge_ids is used by `make_graph` method for non-multigraphs
944 self.edge_ids[source, target] = edge_id
945 try:
946 edge_id = self.edge_key_type(edge_id)
947 except ValueError: # Could not convert.
948 pass
949 else:
950 edge_id = data.get("key")
952 if G.has_edge(source, target):
953 # mark this as a multigraph
954 self.multigraph = True
956 # Use add_edges_from to avoid error with add_edge when `'key' in data`
957 # Note there is only one edge here...
958 G.add_edges_from([(source, target, edge_id, data)])
960 def decode_data_elements(self, graphml_keys, obj_xml):
961 """Use the key information to decode the data XML if present."""
962 data = {}
963 for data_element in obj_xml.findall(f"{{{self.NS_GRAPHML}}}data"):
964 key = data_element.get("key")
965 try:
966 data_name = graphml_keys[key]["name"]
967 data_type = graphml_keys[key]["type"]
968 except KeyError as err:
969 raise nx.NetworkXError(f"Bad GraphML data: no key {key}") from err
970 text = data_element.text
971 # assume anything with subelements is a yfiles extension
972 if text is not None and len(list(data_element)) == 0:
973 if data_type == bool:
974 # Ignore cases.
975 # http://docs.oracle.com/javase/6/docs/api/java/lang/
976 # Boolean.html#parseBoolean%28java.lang.String%29
977 data[data_name] = self.convert_bool[text.lower()]
978 else:
979 data[data_name] = data_type(text)
980 elif len(list(data_element)) > 0:
981 # Assume yfiles as subelements, try to extract node_label
982 node_label = None
983 # set GenericNode's configuration as shape type
984 gn = data_element.find(f"{{{self.NS_Y}}}GenericNode")
985 if gn:
986 data["shape_type"] = gn.get("configuration")
987 for node_type in ["GenericNode", "ShapeNode", "SVGNode", "ImageNode"]:
988 pref = f"{{{self.NS_Y}}}{node_type}/{{{self.NS_Y}}}"
989 geometry = data_element.find(f"{pref}Geometry")
990 if geometry is not None:
991 data["x"] = geometry.get("x")
992 data["y"] = geometry.get("y")
993 if node_label is None:
994 node_label = data_element.find(f"{pref}NodeLabel")
995 shape = data_element.find(f"{pref}Shape")
996 if shape is not None:
997 data["shape_type"] = shape.get("type")
998 if node_label is not None:
999 data["label"] = node_label.text
1001 # check all the different types of edges available in yEd.
1002 for edge_type in [
1003 "PolyLineEdge",
1004 "SplineEdge",
1005 "QuadCurveEdge",
1006 "BezierEdge",
1007 "ArcEdge",
1008 ]:
1009 pref = f"{{{self.NS_Y}}}{edge_type}/{{{self.NS_Y}}}"
1010 edge_label = data_element.find(f"{pref}EdgeLabel")
1011 if edge_label is not None:
1012 break
1014 if edge_label is not None:
1015 data["label"] = edge_label.text
1016 return data
1018 def find_graphml_keys(self, graph_element):
1019 """Extracts all the keys and key defaults from the xml."""
1020 graphml_keys = {}
1021 graphml_key_defaults = {}
1022 for k in graph_element.findall(f"{{{self.NS_GRAPHML}}}key"):
1023 attr_id = k.get("id")
1024 attr_type = k.get("attr.type")
1025 attr_name = k.get("attr.name")
1026 yfiles_type = k.get("yfiles.type")
1027 if yfiles_type is not None:
1028 attr_name = yfiles_type
1029 attr_type = "yfiles"
1030 if attr_type is None:
1031 attr_type = "string"
1032 warnings.warn(f"No key type for id {attr_id}. Using string")
1033 if attr_name is None:
1034 raise nx.NetworkXError(f"Unknown key for id {attr_id}.")
1035 graphml_keys[attr_id] = {
1036 "name": attr_name,
1037 "type": self.python_type[attr_type],
1038 "for": k.get("for"),
1039 }
1040 # check for "default" sub-element of key element
1041 default = k.find(f"{{{self.NS_GRAPHML}}}default")
1042 if default is not None:
1043 # Handle default values identically to data element values
1044 python_type = graphml_keys[attr_id]["type"]
1045 if python_type == bool:
1046 graphml_key_defaults[attr_id] = self.convert_bool[
1047 default.text.lower()
1048 ]
1049 else:
1050 graphml_key_defaults[attr_id] = python_type(default.text)
1051 return graphml_keys, graphml_key_defaults