1"""Algorithms for directed acyclic graphs (DAGs).
2
3Note that most of these functions are only guaranteed to work for DAGs.
4In general, these functions do not check for acyclic-ness, so it is up
5to the user to check for that.
6"""
7
8import heapq
9from collections import deque
10from functools import partial
11from itertools import chain, combinations, product, starmap
12from math import gcd
13
14import networkx as nx
15from networkx.utils import arbitrary_element, not_implemented_for, pairwise
16
17__all__ = [
18 "descendants",
19 "ancestors",
20 "topological_sort",
21 "lexicographical_topological_sort",
22 "all_topological_sorts",
23 "topological_generations",
24 "is_directed_acyclic_graph",
25 "is_aperiodic",
26 "transitive_closure",
27 "transitive_closure_dag",
28 "transitive_reduction",
29 "antichains",
30 "dag_longest_path",
31 "dag_longest_path_length",
32 "dag_to_branching",
33]
34
35chaini = chain.from_iterable
36
37
38@nx._dispatchable
39def descendants(G, source):
40 """Returns all nodes reachable from `source` in `G`.
41
42 Parameters
43 ----------
44 G : NetworkX Graph
45 source : node in `G`
46
47 Returns
48 -------
49 set()
50 The descendants of `source` in `G`
51
52 Raises
53 ------
54 NetworkXError
55 If node `source` is not in `G`.
56
57 Examples
58 --------
59 >>> DG = nx.path_graph(5, create_using=nx.DiGraph)
60 >>> sorted(nx.descendants(DG, 2))
61 [3, 4]
62
63 The `source` node is not a descendant of itself, but can be included manually:
64
65 >>> sorted(nx.descendants(DG, 2) | {2})
66 [2, 3, 4]
67
68 See also
69 --------
70 ancestors
71 """
72 return {child for parent, child in nx.bfs_edges(G, source)}
73
74
75@nx._dispatchable
76def ancestors(G, source):
77 """Returns all nodes having a path to `source` in `G`.
78
79 Parameters
80 ----------
81 G : NetworkX Graph
82 source : node in `G`
83
84 Returns
85 -------
86 set()
87 The ancestors of `source` in `G`
88
89 Raises
90 ------
91 NetworkXError
92 If node `source` is not in `G`.
93
94 Examples
95 --------
96 >>> DG = nx.path_graph(5, create_using=nx.DiGraph)
97 >>> sorted(nx.ancestors(DG, 2))
98 [0, 1]
99
100 The `source` node is not an ancestor of itself, but can be included manually:
101
102 >>> sorted(nx.ancestors(DG, 2) | {2})
103 [0, 1, 2]
104
105 See also
106 --------
107 descendants
108 """
109 return {child for parent, child in nx.bfs_edges(G, source, reverse=True)}
110
111
112@nx._dispatchable
113def has_cycle(G):
114 """Decides whether the directed graph has a cycle."""
115 try:
116 # Feed the entire iterator into a zero-length deque.
117 deque(topological_sort(G), maxlen=0)
118 except nx.NetworkXUnfeasible:
119 return True
120 else:
121 return False
122
123
124@nx._dispatchable
125def is_directed_acyclic_graph(G):
126 """Returns True if the graph `G` is a directed acyclic graph (DAG) or
127 False if not.
128
129 Parameters
130 ----------
131 G : NetworkX graph
132
133 Returns
134 -------
135 bool
136 True if `G` is a DAG, False otherwise
137
138 Examples
139 --------
140 Undirected graph::
141
142 >>> G = nx.Graph([(1, 2), (2, 3)])
143 >>> nx.is_directed_acyclic_graph(G)
144 False
145
146 Directed graph with cycle::
147
148 >>> G = nx.DiGraph([(1, 2), (2, 3), (3, 1)])
149 >>> nx.is_directed_acyclic_graph(G)
150 False
151
152 Directed acyclic graph::
153
154 >>> G = nx.DiGraph([(1, 2), (2, 3)])
155 >>> nx.is_directed_acyclic_graph(G)
156 True
157
158 See also
159 --------
160 topological_sort
161 """
162 return G.is_directed() and not has_cycle(G)
163
164
165@nx._dispatchable
166def topological_generations(G):
167 """Stratifies a DAG into generations.
168
169 A topological generation is node collection in which ancestors of a node in each
170 generation are guaranteed to be in a previous generation, and any descendants of
171 a node are guaranteed to be in a following generation. Nodes are guaranteed to
172 be in the earliest possible generation that they can belong to.
173
174 Parameters
175 ----------
176 G : NetworkX digraph
177 A directed acyclic graph (DAG)
178
179 Yields
180 ------
181 sets of nodes
182 Yields sets of nodes representing each generation.
183
184 Raises
185 ------
186 NetworkXError
187 Generations are defined for directed graphs only. If the graph
188 `G` is undirected, a :exc:`NetworkXError` is raised.
189
190 NetworkXUnfeasible
191 If `G` is not a directed acyclic graph (DAG) no topological generations
192 exist and a :exc:`NetworkXUnfeasible` exception is raised. This can also
193 be raised if `G` is changed while the returned iterator is being processed
194
195 RuntimeError
196 If `G` is changed while the returned iterator is being processed.
197
198 Examples
199 --------
200 >>> DG = nx.DiGraph([(2, 1), (3, 1)])
201 >>> [sorted(generation) for generation in nx.topological_generations(DG)]
202 [[2, 3], [1]]
203
204 Notes
205 -----
206 The generation in which a node resides can also be determined by taking the
207 max-path-distance from the node to the farthest leaf node. That value can
208 be obtained with this function using `enumerate(topological_generations(G))`.
209
210 See also
211 --------
212 topological_sort
213 """
214 if not G.is_directed():
215 raise nx.NetworkXError("Topological sort not defined on undirected graphs.")
216
217 multigraph = G.is_multigraph()
218 indegree_map = {v: d for v, d in G.in_degree() if d > 0}
219 zero_indegree = [v for v, d in G.in_degree() if d == 0]
220
221 while zero_indegree:
222 this_generation = zero_indegree
223 zero_indegree = []
224 for node in this_generation:
225 if node not in G:
226 raise RuntimeError("Graph changed during iteration")
227 for child in G.neighbors(node):
228 try:
229 indegree_map[child] -= len(G[node][child]) if multigraph else 1
230 except KeyError as err:
231 raise RuntimeError("Graph changed during iteration") from err
232 if indegree_map[child] == 0:
233 zero_indegree.append(child)
234 del indegree_map[child]
235 yield this_generation
236
237 if indegree_map:
238 raise nx.NetworkXUnfeasible(
239 "Graph contains a cycle or graph changed during iteration"
240 )
241
242
243@nx._dispatchable
244def topological_sort(G):
245 """Returns a generator of nodes in topologically sorted order.
246
247 A topological sort is a nonunique permutation of the nodes of a
248 directed graph such that an edge from u to v implies that u
249 appears before v in the topological sort order. This ordering is
250 valid only if the graph has no directed cycles.
251
252 Parameters
253 ----------
254 G : NetworkX digraph
255 A directed acyclic graph (DAG)
256
257 Yields
258 ------
259 nodes
260 Yields the nodes in topological sorted order.
261
262 Raises
263 ------
264 NetworkXError
265 Topological sort is defined for directed graphs only. If the graph `G`
266 is undirected, a :exc:`NetworkXError` is raised.
267
268 NetworkXUnfeasible
269 If `G` is not a directed acyclic graph (DAG) no topological sort exists
270 and a :exc:`NetworkXUnfeasible` exception is raised. This can also be
271 raised if `G` is changed while the returned iterator is being processed
272
273 RuntimeError
274 If `G` is changed while the returned iterator is being processed.
275
276 Examples
277 --------
278 To get the reverse order of the topological sort:
279
280 >>> DG = nx.DiGraph([(1, 2), (2, 3)])
281 >>> list(reversed(list(nx.topological_sort(DG))))
282 [3, 2, 1]
283
284 If your DiGraph naturally has the edges representing tasks/inputs
285 and nodes representing people/processes that initiate tasks, then
286 topological_sort is not quite what you need. You will have to change
287 the tasks to nodes with dependence reflected by edges. The result is
288 a kind of topological sort of the edges. This can be done
289 with :func:`networkx.line_graph` as follows:
290
291 >>> list(nx.topological_sort(nx.line_graph(DG)))
292 [(1, 2), (2, 3)]
293
294 Notes
295 -----
296 This algorithm is based on a description and proof in
297 "Introduction to Algorithms: A Creative Approach" [1]_ .
298
299 See also
300 --------
301 is_directed_acyclic_graph, lexicographical_topological_sort
302
303 References
304 ----------
305 .. [1] Manber, U. (1989).
306 *Introduction to Algorithms - A Creative Approach.* Addison-Wesley.
307 """
308 for generation in nx.topological_generations(G):
309 yield from generation
310
311
312@nx._dispatchable
313def lexicographical_topological_sort(G, key=None):
314 """Generate the nodes in the unique lexicographical topological sort order.
315
316 Generates a unique ordering of nodes by first sorting topologically (for which there are often
317 multiple valid orderings) and then additionally by sorting lexicographically.
318
319 A topological sort arranges the nodes of a directed graph so that the
320 upstream node of each directed edge precedes the downstream node.
321 It is always possible to find a solution for directed graphs that have no cycles.
322 There may be more than one valid solution.
323
324 Lexicographical sorting is just sorting alphabetically. It is used here to break ties in the
325 topological sort and to determine a single, unique ordering. This can be useful in comparing
326 sort results.
327
328 The lexicographical order can be customized by providing a function to the `key=` parameter.
329 The definition of the key function is the same as used in python's built-in `sort()`.
330 The function takes a single argument and returns a key to use for sorting purposes.
331
332 Lexicographical sorting can fail if the node names are un-sortable. See the example below.
333 The solution is to provide a function to the `key=` argument that returns sortable keys.
334
335
336 Parameters
337 ----------
338 G : NetworkX digraph
339 A directed acyclic graph (DAG)
340
341 key : function, optional
342 A function of one argument that converts a node name to a comparison key.
343 It defines and resolves ambiguities in the sort order. Defaults to the identity function.
344
345 Yields
346 ------
347 nodes
348 Yields the nodes of G in lexicographical topological sort order.
349
350 Raises
351 ------
352 NetworkXError
353 Topological sort is defined for directed graphs only. If the graph `G`
354 is undirected, a :exc:`NetworkXError` is raised.
355
356 NetworkXUnfeasible
357 If `G` is not a directed acyclic graph (DAG) no topological sort exists
358 and a :exc:`NetworkXUnfeasible` exception is raised. This can also be
359 raised if `G` is changed while the returned iterator is being processed
360
361 RuntimeError
362 If `G` is changed while the returned iterator is being processed.
363
364 TypeError
365 Results from un-sortable node names.
366 Consider using `key=` parameter to resolve ambiguities in the sort order.
367
368 Examples
369 --------
370 >>> DG = nx.DiGraph([(2, 1), (2, 5), (1, 3), (1, 4), (5, 4)])
371 >>> list(nx.lexicographical_topological_sort(DG))
372 [2, 1, 3, 5, 4]
373 >>> list(nx.lexicographical_topological_sort(DG, key=lambda x: -x))
374 [2, 5, 1, 4, 3]
375
376 The sort will fail for any graph with integer and string nodes. Comparison of integer to strings
377 is not defined in python. Is 3 greater or less than 'red'?
378
379 >>> DG = nx.DiGraph([(1, "red"), (3, "red"), (1, "green"), (2, "blue")])
380 >>> list(nx.lexicographical_topological_sort(DG))
381 Traceback (most recent call last):
382 ...
383 TypeError: '<' not supported between instances of 'str' and 'int'
384 ...
385
386 Incomparable nodes can be resolved using a `key` function. This example function
387 allows comparison of integers and strings by returning a tuple where the first
388 element is True for `str`, False otherwise. The second element is the node name.
389 This groups the strings and integers separately so they can be compared only among themselves.
390
391 >>> key = lambda node: (isinstance(node, str), node)
392 >>> list(nx.lexicographical_topological_sort(DG, key=key))
393 [1, 2, 3, 'blue', 'green', 'red']
394
395 Notes
396 -----
397 This algorithm is based on a description and proof in
398 "Introduction to Algorithms: A Creative Approach" [1]_ .
399
400 See also
401 --------
402 topological_sort
403
404 References
405 ----------
406 .. [1] Manber, U. (1989).
407 *Introduction to Algorithms - A Creative Approach.* Addison-Wesley.
408 """
409 if not G.is_directed():
410 msg = "Topological sort not defined on undirected graphs."
411 raise nx.NetworkXError(msg)
412
413 if key is None:
414
415 def key(node):
416 return node
417
418 nodeid_map = {n: i for i, n in enumerate(G)}
419
420 def create_tuple(node):
421 return key(node), nodeid_map[node], node
422
423 indegree_map = {v: d for v, d in G.in_degree() if d > 0}
424 # These nodes have zero indegree and ready to be returned.
425 zero_indegree = [create_tuple(v) for v, d in G.in_degree() if d == 0]
426 heapq.heapify(zero_indegree)
427
428 while zero_indegree:
429 _, _, node = heapq.heappop(zero_indegree)
430
431 if node not in G:
432 raise RuntimeError("Graph changed during iteration")
433 for _, child in G.edges(node):
434 try:
435 indegree_map[child] -= 1
436 except KeyError as err:
437 raise RuntimeError("Graph changed during iteration") from err
438 if indegree_map[child] == 0:
439 try:
440 heapq.heappush(zero_indegree, create_tuple(child))
441 except TypeError as err:
442 raise TypeError(
443 f"{err}\nConsider using `key=` parameter to resolve ambiguities in the sort order."
444 )
445 del indegree_map[child]
446
447 yield node
448
449 if indegree_map:
450 msg = "Graph contains a cycle or graph changed during iteration"
451 raise nx.NetworkXUnfeasible(msg)
452
453
454@not_implemented_for("undirected")
455@nx._dispatchable
456def all_topological_sorts(G):
457 """Returns a generator of _all_ topological sorts of the directed graph G.
458
459 A topological sort is a nonunique permutation of the nodes such that an
460 edge from u to v implies that u appears before v in the topological sort
461 order.
462
463 Parameters
464 ----------
465 G : NetworkX DiGraph
466 A directed graph
467
468 Yields
469 ------
470 topological_sort_order : list
471 a list of nodes in `G`, representing one of the topological sort orders
472
473 Raises
474 ------
475 NetworkXNotImplemented
476 If `G` is not directed
477 NetworkXUnfeasible
478 If `G` is not acyclic
479
480 Examples
481 --------
482 To enumerate all topological sorts of directed graph:
483
484 >>> DG = nx.DiGraph([(1, 2), (2, 3), (2, 4)])
485 >>> list(nx.all_topological_sorts(DG))
486 [[1, 2, 4, 3], [1, 2, 3, 4]]
487
488 Notes
489 -----
490 Implements an iterative version of the algorithm given in [1].
491
492 References
493 ----------
494 .. [1] Knuth, Donald E., Szwarcfiter, Jayme L. (1974).
495 "A Structured Program to Generate All Topological Sorting Arrangements"
496 Information Processing Letters, Volume 2, Issue 6, 1974, Pages 153-157,
497 ISSN 0020-0190,
498 https://doi.org/10.1016/0020-0190(74)90001-5.
499 Elsevier (North-Holland), Amsterdam
500 """
501 if not G.is_directed():
502 raise nx.NetworkXError("Topological sort not defined on undirected graphs.")
503
504 # the names of count and D are chosen to match the global variables in [1]
505 # number of edges originating in a vertex v
506 count = dict(G.in_degree())
507 # vertices with indegree 0
508 D = deque([v for v, d in G.in_degree() if d == 0])
509 # stack of first value chosen at a position k in the topological sort
510 bases = []
511 current_sort = []
512
513 # do-while construct
514 while True:
515 assert all(count[v] == 0 for v in D)
516
517 if len(current_sort) == len(G):
518 yield list(current_sort)
519
520 # clean-up stack
521 while len(current_sort) > 0:
522 assert len(bases) == len(current_sort)
523 q = current_sort.pop()
524
525 # "restores" all edges (q, x)
526 # NOTE: it is important to iterate over edges instead
527 # of successors, so count is updated correctly in multigraphs
528 for _, j in G.out_edges(q):
529 count[j] += 1
530 assert count[j] >= 0
531 # remove entries from D
532 while len(D) > 0 and count[D[-1]] > 0:
533 D.pop()
534
535 # corresponds to a circular shift of the values in D
536 # if the first value chosen (the base) is in the first
537 # position of D again, we are done and need to consider the
538 # previous condition
539 D.appendleft(q)
540 if D[-1] == bases[-1]:
541 # all possible values have been chosen at current position
542 # remove corresponding marker
543 bases.pop()
544 else:
545 # there are still elements that have not been fixed
546 # at the current position in the topological sort
547 # stop removing elements, escape inner loop
548 break
549
550 else:
551 if len(D) == 0:
552 raise nx.NetworkXUnfeasible("Graph contains a cycle.")
553
554 # choose next node
555 q = D.pop()
556 # "erase" all edges (q, x)
557 # NOTE: it is important to iterate over edges instead
558 # of successors, so count is updated correctly in multigraphs
559 for _, j in G.out_edges(q):
560 count[j] -= 1
561 assert count[j] >= 0
562 if count[j] == 0:
563 D.append(j)
564 current_sort.append(q)
565
566 # base for current position might _not_ be fixed yet
567 if len(bases) < len(current_sort):
568 bases.append(q)
569
570 if len(bases) == 0:
571 break
572
573
574@nx._dispatchable
575def is_aperiodic(G):
576 """Returns True if `G` is aperiodic.
577
578 A strongly connected directed graph is aperiodic if there is no integer ``k > 1``
579 that divides the length of every cycle in the graph.
580
581 This function requires the graph `G` to be strongly connected and will raise
582 an error if it's not. For graphs that are not strongly connected, you should
583 first identify their strongly connected components
584 (using :func:`~networkx.algorithms.components.strongly_connected_components`)
585 or attracting components
586 (using :func:`~networkx.algorithms.components.attracting_components`),
587 and then apply this function to those individual components.
588
589 Parameters
590 ----------
591 G : NetworkX DiGraph
592 A directed graph
593
594 Returns
595 -------
596 bool
597 True if the graph is aperiodic False otherwise
598
599 Raises
600 ------
601 NetworkXError
602 If `G` is not directed
603 NetworkXError
604 If `G` is not strongly connected
605 NetworkXPointlessConcept
606 If `G` has no nodes
607
608 Examples
609 --------
610 A graph consisting of one cycle, the length of which is 2. Therefore ``k = 2``
611 divides the length of every cycle in the graph and thus the graph
612 is *not aperiodic*::
613
614 >>> DG = nx.DiGraph([(1, 2), (2, 1)])
615 >>> nx.is_aperiodic(DG)
616 False
617
618 A graph consisting of two cycles: one of length 2 and the other of length 3.
619 The cycle lengths are coprime, so there is no single value of k where ``k > 1``
620 that divides each cycle length and therefore the graph is *aperiodic*::
621
622 >>> DG = nx.DiGraph([(1, 2), (2, 3), (3, 1), (1, 4), (4, 1)])
623 >>> nx.is_aperiodic(DG)
624 True
625
626 A graph created from cycles of the same length can still be aperiodic since
627 the cycles can overlap and form new cycles of different lengths. For example,
628 the following graph contains a cycle ``[4, 2, 3, 1]`` of length 4, which is coprime
629 with the explicitly added cycles of length 3, so the graph is aperiodic::
630
631 >>> DG = nx.DiGraph()
632 >>> nx.add_cycle(DG, [1, 2, 3])
633 >>> nx.add_cycle(DG, [2, 1, 4])
634 >>> nx.is_aperiodic(DG)
635 True
636
637 A single-node graph's aperiodicity depends on whether it has a self-loop:
638 it is aperiodic if a self-loop exists, and periodic otherwise::
639
640 >>> G = nx.DiGraph()
641 >>> G.add_node(1)
642 >>> nx.is_aperiodic(G)
643 False
644 >>> G.add_edge(1, 1)
645 >>> nx.is_aperiodic(G)
646 True
647
648 A Markov chain can be modeled as a directed graph, with nodes representing
649 states and edges representing transitions with non-zero probability.
650 Aperiodicity is typically considered for irreducible Markov chains,
651 which are those that are *strongly connected* as graphs.
652
653 The following Markov chain is irreducible and aperiodic, and thus
654 ergodic. It is guaranteed to have a unique stationary distribution::
655
656 >>> G = nx.DiGraph()
657 >>> nx.add_cycle(G, [1, 2, 3, 4])
658 >>> G.add_edge(1, 3)
659 >>> nx.is_aperiodic(G)
660 True
661
662 Reducible Markov chains can sometimes have a unique stationary distribution.
663 This occurs if the chain has exactly one closed communicating class and
664 that class itself is aperiodic (see [1]_). You can use
665 :func:`~networkx.algorithms.components.attracting_components`
666 to find these closed communicating classes::
667
668 >>> G = nx.DiGraph([(1, 3), (2, 3)])
669 >>> nx.add_cycle(G, [3, 4, 5, 6])
670 >>> nx.add_cycle(G, [3, 5, 6])
671 >>> communicating_classes = list(nx.strongly_connected_components(G))
672 >>> len(communicating_classes)
673 3
674 >>> closed_communicating_classes = list(nx.attracting_components(G))
675 >>> len(closed_communicating_classes)
676 1
677 >>> nx.is_aperiodic(G.subgraph(closed_communicating_classes[0]))
678 True
679
680 Notes
681 -----
682 This uses the method outlined in [1]_, which runs in $O(m)$ time
683 given $m$ edges in `G`.
684
685 References
686 ----------
687 .. [1] Jarvis, J. P.; Shier, D. R. (1996),
688 "Graph-theoretic analysis of finite Markov chains,"
689 in Shier, D. R.; Wallenius, K. T., Applied Mathematical Modeling:
690 A Multidisciplinary Approach, CRC Press.
691 """
692 if not G.is_directed():
693 raise nx.NetworkXError("is_aperiodic not defined for undirected graphs")
694 if len(G) == 0:
695 raise nx.NetworkXPointlessConcept("Graph has no nodes.")
696 if not nx.is_strongly_connected(G):
697 raise nx.NetworkXError("Graph is not strongly connected.")
698 s = arbitrary_element(G)
699 levels = {s: 0}
700 this_level = [s]
701 g = 0
702 lev = 1
703 while this_level:
704 next_level = []
705 for u in this_level:
706 for v in G[u]:
707 if v in levels: # Non-Tree Edge
708 g = gcd(g, levels[u] - levels[v] + 1)
709 else: # Tree Edge
710 next_level.append(v)
711 levels[v] = lev
712 this_level = next_level
713 lev += 1
714 return g == 1
715
716
717@nx._dispatchable(preserve_all_attrs=True, returns_graph=True)
718def transitive_closure(G, reflexive=False):
719 """Returns transitive closure of a graph
720
721 The transitive closure of G = (V,E) is a graph G+ = (V,E+) such that
722 for all v, w in V there is an edge (v, w) in E+ if and only if there
723 is a path from v to w in G.
724
725 Handling of paths from v to v has some flexibility within this definition.
726 A reflexive transitive closure creates a self-loop for the path
727 from v to v of length 0. The usual transitive closure creates a
728 self-loop only if a cycle exists (a path from v to v with length > 0).
729 We also allow an option for no self-loops.
730
731 Parameters
732 ----------
733 G : NetworkX Graph
734 A directed/undirected graph/multigraph.
735 reflexive : Bool or None, optional (default: False)
736 Determines when cycles create self-loops in the Transitive Closure.
737 If True, trivial cycles (length 0) create self-loops. The result
738 is a reflexive transitive closure of G.
739 If False (the default) non-trivial cycles create self-loops.
740 If None, self-loops are not created.
741
742 Returns
743 -------
744 NetworkX graph
745 The transitive closure of `G`
746
747 Raises
748 ------
749 NetworkXError
750 If `reflexive` not in `{None, True, False}`
751
752 Examples
753 --------
754 The treatment of trivial (i.e. length 0) cycles is controlled by the
755 `reflexive` parameter.
756
757 Trivial (i.e. length 0) cycles do not create self-loops when
758 ``reflexive=False`` (the default)::
759
760 >>> DG = nx.DiGraph([(1, 2), (2, 3)])
761 >>> TC = nx.transitive_closure(DG, reflexive=False)
762 >>> TC.edges()
763 OutEdgeView([(1, 2), (1, 3), (2, 3)])
764
765 However, nontrivial (i.e. length greater than 0) cycles create self-loops
766 when ``reflexive=False`` (the default)::
767
768 >>> DG = nx.DiGraph([(1, 2), (2, 3), (3, 1)])
769 >>> TC = nx.transitive_closure(DG, reflexive=False)
770 >>> TC.edges()
771 OutEdgeView([(1, 2), (1, 3), (1, 1), (2, 3), (2, 1), (2, 2), (3, 1), (3, 2), (3, 3)])
772
773 Trivial cycles (length 0) create self-loops when ``reflexive=True``::
774
775 >>> DG = nx.DiGraph([(1, 2), (2, 3)])
776 >>> TC = nx.transitive_closure(DG, reflexive=True)
777 >>> TC.edges()
778 OutEdgeView([(1, 2), (1, 1), (1, 3), (2, 3), (2, 2), (3, 3)])
779
780 And the third option is not to create self-loops at all when ``reflexive=None``::
781
782 >>> DG = nx.DiGraph([(1, 2), (2, 3), (3, 1)])
783 >>> TC = nx.transitive_closure(DG, reflexive=None)
784 >>> TC.edges()
785 OutEdgeView([(1, 2), (1, 3), (2, 3), (2, 1), (3, 1), (3, 2)])
786
787 References
788 ----------
789 .. [1] https://www.ics.uci.edu/~eppstein/PADS/PartialOrder.py
790 """
791 TC = G.copy()
792
793 if reflexive not in {None, True, False}:
794 raise nx.NetworkXError("Incorrect value for the parameter `reflexive`")
795
796 for v in G:
797 if reflexive is None:
798 TC.add_edges_from((v, u) for u in nx.descendants(G, v) if u not in TC[v])
799 elif reflexive is True:
800 TC.add_edges_from(
801 (v, u) for u in nx.descendants(G, v) | {v} if u not in TC[v]
802 )
803 elif reflexive is False:
804 TC.add_edges_from((v, e[1]) for e in nx.edge_bfs(G, v) if e[1] not in TC[v])
805
806 return TC
807
808
809@not_implemented_for("undirected")
810@nx._dispatchable(preserve_all_attrs=True, returns_graph=True)
811def transitive_closure_dag(G, topo_order=None):
812 """Returns the transitive closure of a directed acyclic graph.
813
814 This function is faster than the function `transitive_closure`, but fails
815 if the graph has a cycle.
816
817 The transitive closure of G = (V,E) is a graph G+ = (V,E+) such that
818 for all v, w in V there is an edge (v, w) in E+ if and only if there
819 is a non-null path from v to w in G.
820
821 Parameters
822 ----------
823 G : NetworkX DiGraph
824 A directed acyclic graph (DAG)
825
826 topo_order: list or tuple, optional
827 A topological order for G (if None, the function will compute one)
828
829 Returns
830 -------
831 NetworkX DiGraph
832 The transitive closure of `G`
833
834 Raises
835 ------
836 NetworkXNotImplemented
837 If `G` is not directed
838 NetworkXUnfeasible
839 If `G` has a cycle
840
841 Examples
842 --------
843 >>> DG = nx.DiGraph([(1, 2), (2, 3)])
844 >>> TC = nx.transitive_closure_dag(DG)
845 >>> TC.edges()
846 OutEdgeView([(1, 2), (1, 3), (2, 3)])
847
848 Notes
849 -----
850 This algorithm is probably simple enough to be well-known but I didn't find
851 a mention in the literature.
852 """
853 if topo_order is None:
854 topo_order = list(topological_sort(G))
855
856 TC = G.copy()
857
858 # idea: traverse vertices following a reverse topological order, connecting
859 # each vertex to its descendants at distance 2 as we go
860 for v in reversed(topo_order):
861 TC.add_edges_from((v, u) for u in nx.descendants_at_distance(TC, v, 2))
862
863 return TC
864
865
866@not_implemented_for("undirected")
867@nx._dispatchable(returns_graph=True)
868def transitive_reduction(G):
869 """Returns transitive reduction of a directed graph
870
871 The transitive reduction of G = (V,E) is a graph G- = (V,E-) such that
872 for all v,w in V there is an edge (v,w) in E- if and only if (v,w) is
873 in E and there is no path from v to w in G with length greater than 1.
874
875 Parameters
876 ----------
877 G : NetworkX DiGraph
878 A directed acyclic graph (DAG)
879
880 Returns
881 -------
882 NetworkX DiGraph
883 The transitive reduction of `G`
884
885 Raises
886 ------
887 NetworkXError
888 If `G` is not a directed acyclic graph (DAG) transitive reduction is
889 not uniquely defined and a :exc:`NetworkXError` exception is raised.
890
891 Examples
892 --------
893 To perform transitive reduction on a DiGraph:
894
895 >>> DG = nx.DiGraph([(1, 2), (2, 3), (1, 3)])
896 >>> TR = nx.transitive_reduction(DG)
897 >>> list(TR.edges)
898 [(1, 2), (2, 3)]
899
900 To avoid unnecessary data copies, this implementation does not return a
901 DiGraph with node/edge data.
902 To perform transitive reduction on a DiGraph and transfer node/edge data:
903
904 >>> DG = nx.DiGraph()
905 >>> DG.add_edges_from([(1, 2), (2, 3), (1, 3)], color="red")
906 >>> TR = nx.transitive_reduction(DG)
907 >>> TR.add_nodes_from(DG.nodes(data=True))
908 >>> TR.add_edges_from((u, v, DG.edges[u, v]) for u, v in TR.edges)
909 >>> list(TR.edges(data=True))
910 [(1, 2, {'color': 'red'}), (2, 3, {'color': 'red'})]
911
912 References
913 ----------
914 https://en.wikipedia.org/wiki/Transitive_reduction
915
916 """
917 if not is_directed_acyclic_graph(G):
918 msg = "Directed Acyclic Graph required for transitive_reduction"
919 raise nx.NetworkXError(msg)
920 TR = nx.DiGraph()
921 TR.add_nodes_from(G.nodes())
922 descendants = {}
923 # count before removing set stored in descendants
924 check_count = dict(G.in_degree)
925 for u in G:
926 u_nbrs = set(G[u])
927 for v in G[u]:
928 if v in u_nbrs:
929 if v not in descendants:
930 descendants[v] = {y for x, y in nx.dfs_edges(G, v)}
931 u_nbrs -= descendants[v]
932 check_count[v] -= 1
933 if check_count[v] == 0:
934 del descendants[v]
935 TR.add_edges_from((u, v) for v in u_nbrs)
936 return TR
937
938
939@not_implemented_for("undirected")
940@nx._dispatchable
941def antichains(G, topo_order=None):
942 """Generates antichains from a directed acyclic graph (DAG).
943
944 An antichain is a subset of a partially ordered set such that any
945 two elements in the subset are incomparable.
946
947 Parameters
948 ----------
949 G : NetworkX DiGraph
950 A directed acyclic graph (DAG)
951
952 topo_order: list or tuple, optional
953 A topological order for G (if None, the function will compute one)
954
955 Yields
956 ------
957 antichain : list
958 a list of nodes in `G` representing an antichain
959
960 Raises
961 ------
962 NetworkXNotImplemented
963 If `G` is not directed
964
965 NetworkXUnfeasible
966 If `G` contains a cycle
967
968 Examples
969 --------
970 >>> DG = nx.DiGraph([(1, 2), (1, 3)])
971 >>> list(nx.antichains(DG))
972 [[], [3], [2], [2, 3], [1]]
973
974 Notes
975 -----
976 This function was originally developed by Peter Jipsen and Franco Saliola
977 for the SAGE project. It's included in NetworkX with permission from the
978 authors. Original SAGE code at:
979
980 https://github.com/sagemath/sage/blob/master/src/sage/combinat/posets/hasse_diagram.py
981
982 References
983 ----------
984 .. [1] Free Lattices, by R. Freese, J. Jezek and J. B. Nation,
985 AMS, Vol 42, 1995, p. 226.
986 """
987 if topo_order is None:
988 topo_order = list(nx.topological_sort(G))
989
990 TC = nx.transitive_closure_dag(G, topo_order)
991 antichains_stacks = [([], list(reversed(topo_order)))]
992
993 while antichains_stacks:
994 (antichain, stack) = antichains_stacks.pop()
995 # Invariant:
996 # - the elements of antichain are independent
997 # - the elements of stack are independent from those of antichain
998 yield antichain
999 while stack:
1000 x = stack.pop()
1001 new_antichain = antichain + [x]
1002 new_stack = [t for t in stack if not ((t in TC[x]) or (x in TC[t]))]
1003 antichains_stacks.append((new_antichain, new_stack))
1004
1005
1006@not_implemented_for("undirected")
1007@nx._dispatchable(edge_attrs={"weight": "default_weight"})
1008def dag_longest_path(G, weight="weight", default_weight=1, topo_order=None):
1009 """Returns the longest path in a directed acyclic graph (DAG).
1010
1011 If `G` has edges with `weight` attribute the edge data are used as
1012 weight values.
1013
1014 Parameters
1015 ----------
1016 G : NetworkX DiGraph
1017 A directed acyclic graph (DAG)
1018
1019 weight : str, optional
1020 Edge data key to use for weight
1021
1022 default_weight : int, optional
1023 The weight of edges that do not have a weight attribute
1024
1025 topo_order: list or tuple, optional
1026 A topological order for `G` (if None, the function will compute one)
1027
1028 Returns
1029 -------
1030 list
1031 Longest path
1032
1033 Raises
1034 ------
1035 NetworkXNotImplemented
1036 If `G` is not directed
1037
1038 Examples
1039 --------
1040 >>> DG = nx.DiGraph(
1041 ... [(0, 1, {"cost": 1}), (1, 2, {"cost": 1}), (0, 2, {"cost": 42})]
1042 ... )
1043 >>> list(nx.all_simple_paths(DG, 0, 2))
1044 [[0, 1, 2], [0, 2]]
1045 >>> nx.dag_longest_path(DG)
1046 [0, 1, 2]
1047 >>> nx.dag_longest_path(DG, weight="cost")
1048 [0, 2]
1049
1050 In the case where multiple valid topological orderings exist, `topo_order`
1051 can be used to specify a specific ordering:
1052
1053 >>> DG = nx.DiGraph([(0, 1), (0, 2)])
1054 >>> sorted(nx.all_topological_sorts(DG)) # Valid topological orderings
1055 [[0, 1, 2], [0, 2, 1]]
1056 >>> nx.dag_longest_path(DG, topo_order=[0, 1, 2])
1057 [0, 1]
1058 >>> nx.dag_longest_path(DG, topo_order=[0, 2, 1])
1059 [0, 2]
1060
1061 See also
1062 --------
1063 dag_longest_path_length
1064
1065 """
1066 if not G:
1067 return []
1068
1069 if topo_order is None:
1070 topo_order = nx.topological_sort(G)
1071
1072 dist = {} # stores {v : (length, u)}
1073 for v in topo_order:
1074 us = [
1075 (
1076 dist[u][0]
1077 + (
1078 max(data.values(), key=lambda x: x.get(weight, default_weight))
1079 if G.is_multigraph()
1080 else data
1081 ).get(weight, default_weight),
1082 u,
1083 )
1084 for u, data in G.pred[v].items()
1085 ]
1086
1087 # Use the best predecessor if there is one and its distance is
1088 # non-negative, otherwise terminate.
1089 maxu = max(us, key=lambda x: x[0]) if us else (0, v)
1090 dist[v] = maxu if maxu[0] >= 0 else (0, v)
1091
1092 u = None
1093 v = max(dist, key=lambda x: dist[x][0])
1094 path = []
1095 while u != v:
1096 path.append(v)
1097 u = v
1098 v = dist[v][1]
1099
1100 path.reverse()
1101 return path
1102
1103
1104@not_implemented_for("undirected")
1105@nx._dispatchable(edge_attrs={"weight": "default_weight"})
1106def dag_longest_path_length(G, weight="weight", default_weight=1):
1107 """Returns the longest path length in a DAG
1108
1109 Parameters
1110 ----------
1111 G : NetworkX DiGraph
1112 A directed acyclic graph (DAG)
1113
1114 weight : string, optional
1115 Edge data key to use for weight
1116
1117 default_weight : int, optional
1118 The weight of edges that do not have a weight attribute
1119
1120 Returns
1121 -------
1122 int
1123 Longest path length
1124
1125 Raises
1126 ------
1127 NetworkXNotImplemented
1128 If `G` is not directed
1129
1130 Examples
1131 --------
1132 >>> DG = nx.DiGraph(
1133 ... [(0, 1, {"cost": 1}), (1, 2, {"cost": 1}), (0, 2, {"cost": 42})]
1134 ... )
1135 >>> list(nx.all_simple_paths(DG, 0, 2))
1136 [[0, 1, 2], [0, 2]]
1137 >>> nx.dag_longest_path_length(DG)
1138 2
1139 >>> nx.dag_longest_path_length(DG, weight="cost")
1140 42
1141
1142 See also
1143 --------
1144 dag_longest_path
1145 """
1146 path = nx.dag_longest_path(G, weight, default_weight)
1147 path_length = 0
1148 if G.is_multigraph():
1149 for u, v in pairwise(path):
1150 i = max(G[u][v], key=lambda x: G[u][v][x].get(weight, default_weight))
1151 path_length += G[u][v][i].get(weight, default_weight)
1152 else:
1153 for u, v in pairwise(path):
1154 path_length += G[u][v].get(weight, default_weight)
1155
1156 return path_length
1157
1158
1159@nx._dispatchable
1160def root_to_leaf_paths(G):
1161 """Yields root-to-leaf paths in a directed acyclic graph.
1162
1163 `G` must be a directed acyclic graph. If not, the behavior of this
1164 function is undefined. A "root" in this graph is a node of in-degree
1165 zero and a "leaf" a node of out-degree zero.
1166
1167 When invoked, this function iterates over each path from any root to
1168 any leaf. A path is a list of nodes.
1169
1170 """
1171 roots = (v for v, d in G.in_degree() if d == 0)
1172 leaves = (v for v, d in G.out_degree() if d == 0)
1173 all_paths = partial(nx.all_simple_paths, G)
1174 # TODO In Python 3, this would be better as `yield from ...`.
1175 return chaini(starmap(all_paths, product(roots, leaves)))
1176
1177
1178@not_implemented_for("multigraph")
1179@not_implemented_for("undirected")
1180@nx._dispatchable(returns_graph=True)
1181def dag_to_branching(G):
1182 """Returns a branching representing all (overlapping) paths from
1183 root nodes to leaf nodes in the given directed acyclic graph.
1184
1185 As described in :mod:`networkx.algorithms.tree.recognition`, a
1186 *branching* is a directed forest in which each node has at most one
1187 parent. In other words, a branching is a disjoint union of
1188 *arborescences*. For this function, each node of in-degree zero in
1189 `G` becomes a root of one of the arborescences, and there will be
1190 one leaf node for each distinct path from that root to a leaf node
1191 in `G`.
1192
1193 Each node `v` in `G` with *k* parents becomes *k* distinct nodes in
1194 the returned branching, one for each parent, and the sub-DAG rooted
1195 at `v` is duplicated for each copy. The algorithm then recurses on
1196 the children of each copy of `v`.
1197
1198 Parameters
1199 ----------
1200 G : NetworkX graph
1201 A directed acyclic graph.
1202
1203 Returns
1204 -------
1205 DiGraph
1206 The branching in which there is a bijection between root-to-leaf
1207 paths in `G` (in which multiple paths may share the same leaf)
1208 and root-to-leaf paths in the branching (in which there is a
1209 unique path from a root to a leaf).
1210
1211 Each node has an attribute 'source' whose value is the original
1212 node to which this node corresponds. No other graph, node, or
1213 edge attributes are copied into this new graph.
1214
1215 Raises
1216 ------
1217 NetworkXNotImplemented
1218 If `G` is not directed, or if `G` is a multigraph.
1219
1220 HasACycle
1221 If `G` is not acyclic.
1222
1223 Examples
1224 --------
1225 To examine which nodes in the returned branching were produced by
1226 which original node in the directed acyclic graph, we can collect
1227 the mapping from source node to new nodes into a dictionary. For
1228 example, consider the directed diamond graph::
1229
1230 >>> from collections import defaultdict
1231 >>> from operator import itemgetter
1232 >>>
1233 >>> G = nx.DiGraph(nx.utils.pairwise("abd"))
1234 >>> G.add_edges_from(nx.utils.pairwise("acd"))
1235 >>> B = nx.dag_to_branching(G)
1236 >>>
1237 >>> sources = defaultdict(set)
1238 >>> for v, source in B.nodes(data="source"):
1239 ... sources[source].add(v)
1240 >>> len(sources["a"])
1241 1
1242 >>> len(sources["d"])
1243 2
1244
1245 To copy node attributes from the original graph to the new graph,
1246 you can use a dictionary like the one constructed in the above
1247 example::
1248
1249 >>> for source, nodes in sources.items():
1250 ... for v in nodes:
1251 ... B.nodes[v].update(G.nodes[source])
1252
1253 Notes
1254 -----
1255 This function is not idempotent in the sense that the node labels in
1256 the returned branching may be uniquely generated each time the
1257 function is invoked. In fact, the node labels may not be integers;
1258 in order to relabel the nodes to be more readable, you can use the
1259 :func:`networkx.convert_node_labels_to_integers` function.
1260
1261 The current implementation of this function uses
1262 :func:`networkx.prefix_tree`, so it is subject to the limitations of
1263 that function.
1264
1265 """
1266 if has_cycle(G):
1267 msg = "dag_to_branching is only defined for acyclic graphs"
1268 raise nx.HasACycle(msg)
1269 paths = root_to_leaf_paths(G)
1270 B = nx.prefix_tree(paths)
1271 # Remove the synthetic `root`(0) and `NIL`(-1) nodes from the tree
1272 B.remove_node(0)
1273 B.remove_node(-1)
1274 return B
1275
1276
1277@not_implemented_for("undirected")
1278@nx._dispatchable
1279def v_structures(G):
1280 """Yields 3-node tuples that represent the v-structures in `G`.
1281
1282 Colliders are triples in the directed acyclic graph (DAG) where two parent nodes
1283 point to the same child node. V-structures are colliders where the two parent
1284 nodes are not adjacent. In a causal graph setting, the parents do not directly
1285 depend on each other, but conditioning on the child node provides an association.
1286
1287 Parameters
1288 ----------
1289 G : graph
1290 A networkx `~networkx.DiGraph`.
1291
1292 Yields
1293 ------
1294 A 3-tuple representation of a v-structure
1295 Each v-structure is a 3-tuple with the parent, collider, and other parent.
1296
1297 Raises
1298 ------
1299 NetworkXNotImplemented
1300 If `G` is an undirected graph.
1301
1302 Examples
1303 --------
1304 >>> G = nx.DiGraph([(1, 2), (0, 4), (3, 1), (2, 4), (0, 5), (4, 5), (1, 5)])
1305 >>> nx.is_directed_acyclic_graph(G)
1306 True
1307 >>> list(nx.dag.v_structures(G))
1308 [(0, 4, 2), (0, 5, 1), (4, 5, 1)]
1309
1310 See Also
1311 --------
1312 colliders
1313
1314 Notes
1315 -----
1316 This function was written to be used on DAGs, however it works on cyclic graphs
1317 too. Since colliders are referred to in the cyclic causal graph literature
1318 [2]_ we allow cyclic graphs in this function. It is suggested that you test if
1319 your input graph is acyclic as in the example if you want that property.
1320
1321 References
1322 ----------
1323 .. [1] `Pearl's PRIMER <https://bayes.cs.ucla.edu/PRIMER/primer-ch2.pdf>`_
1324 Ch-2 page 50: v-structures def.
1325 .. [2] A Hyttinen, P.O. Hoyer, F. Eberhardt, M J ̈arvisalo, (2013)
1326 "Discovering cyclic causal models with latent variables:
1327 a general SAT-based procedure", UAI'13: Proceedings of the Twenty-Ninth
1328 Conference on Uncertainty in Artificial Intelligence, pg 301–310,
1329 `doi:10.5555/3023638.3023669 <https://dl.acm.org/doi/10.5555/3023638.3023669>`_
1330 """
1331 for p1, c, p2 in colliders(G):
1332 if not (G.has_edge(p1, p2) or G.has_edge(p2, p1)):
1333 yield (p1, c, p2)
1334
1335
1336@not_implemented_for("undirected")
1337@nx._dispatchable
1338def colliders(G):
1339 """Yields 3-node tuples that represent the colliders in `G`.
1340
1341 In a Directed Acyclic Graph (DAG), if you have three nodes A, B, and C, and
1342 there are edges from A to C and from B to C, then C is a collider [1]_ . In
1343 a causal graph setting, this means that both events A and B are "causing" C,
1344 and conditioning on C provide an association between A and B even if
1345 no direct causal relationship exists between A and B.
1346
1347 Parameters
1348 ----------
1349 G : graph
1350 A networkx `~networkx.DiGraph`.
1351
1352 Yields
1353 ------
1354 A 3-tuple representation of a collider
1355 Each collider is a 3-tuple with the parent, collider, and other parent.
1356
1357 Raises
1358 ------
1359 NetworkXNotImplemented
1360 If `G` is an undirected graph.
1361
1362 Examples
1363 --------
1364 >>> G = nx.DiGraph([(1, 2), (0, 4), (3, 1), (2, 4), (0, 5), (4, 5), (1, 5)])
1365 >>> nx.is_directed_acyclic_graph(G)
1366 True
1367 >>> list(nx.dag.colliders(G))
1368 [(0, 4, 2), (0, 5, 4), (0, 5, 1), (4, 5, 1)]
1369
1370 See Also
1371 --------
1372 v_structures
1373
1374 Notes
1375 -----
1376 This function was written to be used on DAGs, however it works on cyclic graphs
1377 too. Since colliders are referred to in the cyclic causal graph literature
1378 [2]_ we allow cyclic graphs in this function. It is suggested that you test if
1379 your input graph is acyclic as in the example if you want that property.
1380
1381 References
1382 ----------
1383 .. [1] `Wikipedia: Collider in causal graphs <https://en.wikipedia.org/wiki/Collider_(statistics)>`_
1384 .. [2] A Hyttinen, P.O. Hoyer, F. Eberhardt, M J ̈arvisalo, (2013)
1385 "Discovering cyclic causal models with latent variables:
1386 a general SAT-based procedure", UAI'13: Proceedings of the Twenty-Ninth
1387 Conference on Uncertainty in Artificial Intelligence, pg 301–310,
1388 `doi:10.5555/3023638.3023669 <https://dl.acm.org/doi/10.5555/3023638.3023669>`_
1389 """
1390 for node in G.nodes:
1391 for p1, p2 in combinations(G.predecessors(node), 2):
1392 yield (p1, node, p2)