1"""Algorithms for directed acyclic graphs (DAGs).
2
3Note that most of these functions are only guaranteed to work for DAGs.
4In general, these functions do not check for acyclic-ness, so it is up
5to the user to check for that.
6"""
7
8import heapq
9from collections import deque
10from itertools import combinations, product
11from math import gcd
12
13import networkx as nx
14from networkx.utils import arbitrary_element, not_implemented_for, pairwise
15
16__all__ = [
17 "descendants",
18 "ancestors",
19 "topological_sort",
20 "lexicographical_topological_sort",
21 "all_topological_sorts",
22 "topological_generations",
23 "is_directed_acyclic_graph",
24 "is_aperiodic",
25 "transitive_closure",
26 "transitive_closure_dag",
27 "transitive_reduction",
28 "antichains",
29 "dag_longest_path",
30 "dag_longest_path_length",
31 "dag_to_branching",
32]
33
34
35@nx._dispatchable
36def descendants(G, source):
37 """Returns all nodes reachable from `source` in `G`.
38
39 Parameters
40 ----------
41 G : NetworkX Graph
42 source : node in `G`
43
44 Returns
45 -------
46 set()
47 The descendants of `source` in `G`
48
49 Raises
50 ------
51 NetworkXError
52 If node `source` is not in `G`.
53
54 Examples
55 --------
56 >>> DG = nx.path_graph(5, create_using=nx.DiGraph)
57 >>> sorted(nx.descendants(DG, 2))
58 [3, 4]
59
60 The `source` node is not a descendant of itself, but can be included manually:
61
62 >>> sorted(nx.descendants(DG, 2) | {2})
63 [2, 3, 4]
64
65 See also
66 --------
67 ancestors
68 """
69 return {child for parent, child in nx.bfs_edges(G, source)}
70
71
72@nx._dispatchable
73def ancestors(G, source):
74 """Returns all nodes having a path to `source` in `G`.
75
76 Parameters
77 ----------
78 G : NetworkX Graph
79 source : node in `G`
80
81 Returns
82 -------
83 set()
84 The ancestors of `source` in `G`
85
86 Raises
87 ------
88 NetworkXError
89 If node `source` is not in `G`.
90
91 Examples
92 --------
93 >>> DG = nx.path_graph(5, create_using=nx.DiGraph)
94 >>> sorted(nx.ancestors(DG, 2))
95 [0, 1]
96
97 The `source` node is not an ancestor of itself, but can be included manually:
98
99 >>> sorted(nx.ancestors(DG, 2) | {2})
100 [0, 1, 2]
101
102 See also
103 --------
104 descendants
105 """
106 return {child for parent, child in nx.bfs_edges(G, source, reverse=True)}
107
108
109@nx._dispatchable
110def has_cycle(G):
111 """Decides whether the directed graph has a cycle."""
112 try:
113 # Feed the entire iterator into a zero-length deque.
114 deque(topological_sort(G), maxlen=0)
115 except nx.NetworkXUnfeasible:
116 return True
117 else:
118 return False
119
120
121@nx._dispatchable
122def is_directed_acyclic_graph(G):
123 """Returns True if the graph `G` is a directed acyclic graph (DAG) or
124 False if not.
125
126 Parameters
127 ----------
128 G : NetworkX graph
129
130 Returns
131 -------
132 bool
133 True if `G` is a DAG, False otherwise
134
135 Examples
136 --------
137 Undirected graph::
138
139 >>> G = nx.Graph([(1, 2), (2, 3)])
140 >>> nx.is_directed_acyclic_graph(G)
141 False
142
143 Directed graph with cycle::
144
145 >>> G = nx.DiGraph([(1, 2), (2, 3), (3, 1)])
146 >>> nx.is_directed_acyclic_graph(G)
147 False
148
149 Directed acyclic graph::
150
151 >>> G = nx.DiGraph([(1, 2), (2, 3)])
152 >>> nx.is_directed_acyclic_graph(G)
153 True
154
155 See also
156 --------
157 topological_sort
158 """
159 return G.is_directed() and not has_cycle(G)
160
161
162@nx._dispatchable
163def topological_generations(G):
164 """Stratifies a DAG into generations.
165
166 A topological generation is node collection in which ancestors of a node in each
167 generation are guaranteed to be in a previous generation, and any descendants of
168 a node are guaranteed to be in a following generation. Nodes are guaranteed to
169 be in the earliest possible generation that they can belong to.
170
171 Parameters
172 ----------
173 G : NetworkX digraph
174 A directed acyclic graph (DAG)
175
176 Yields
177 ------
178 sets of nodes
179 Yields sets of nodes representing each generation.
180
181 Raises
182 ------
183 NetworkXError
184 Generations are defined for directed graphs only. If the graph
185 `G` is undirected, a :exc:`NetworkXError` is raised.
186
187 NetworkXUnfeasible
188 If `G` is not a directed acyclic graph (DAG) no topological generations
189 exist and a :exc:`NetworkXUnfeasible` exception is raised. This can also
190 be raised if `G` is changed while the returned iterator is being processed
191
192 RuntimeError
193 If `G` is changed while the returned iterator is being processed.
194
195 Examples
196 --------
197 >>> DG = nx.DiGraph([(2, 1), (3, 1)])
198 >>> [sorted(generation) for generation in nx.topological_generations(DG)]
199 [[2, 3], [1]]
200
201 Notes
202 -----
203 The generation in which a node resides can also be determined by taking the
204 max-path-distance from the node to the farthest leaf node. That value can
205 be obtained with this function using `enumerate(topological_generations(G))`.
206
207 See also
208 --------
209 topological_sort
210 """
211 if not G.is_directed():
212 raise nx.NetworkXError("Topological sort not defined on undirected graphs.")
213
214 multigraph = G.is_multigraph()
215 indegree_map = {v: d for v, d in G.in_degree() if d > 0}
216 zero_indegree = [v for v, d in G.in_degree() if d == 0]
217
218 while zero_indegree:
219 this_generation = zero_indegree
220 zero_indegree = []
221 for node in this_generation:
222 if node not in G:
223 raise RuntimeError("Graph changed during iteration")
224 for child in G.neighbors(node):
225 try:
226 indegree_map[child] -= len(G[node][child]) if multigraph else 1
227 except KeyError as err:
228 raise RuntimeError("Graph changed during iteration") from err
229 if indegree_map[child] == 0:
230 zero_indegree.append(child)
231 del indegree_map[child]
232 yield this_generation
233
234 if indegree_map:
235 raise nx.NetworkXUnfeasible(
236 "Graph contains a cycle or graph changed during iteration"
237 )
238
239
240@nx._dispatchable
241def topological_sort(G):
242 """Returns a generator of nodes in topologically sorted order.
243
244 A topological sort is a nonunique permutation of the nodes of a
245 directed graph such that an edge from u to v implies that u
246 appears before v in the topological sort order. This ordering is
247 valid only if the graph has no directed cycles.
248
249 Parameters
250 ----------
251 G : NetworkX digraph
252 A directed acyclic graph (DAG)
253
254 Yields
255 ------
256 nodes
257 Yields the nodes in topological sorted order.
258
259 Raises
260 ------
261 NetworkXError
262 Topological sort is defined for directed graphs only. If the graph `G`
263 is undirected, a :exc:`NetworkXError` is raised.
264
265 NetworkXUnfeasible
266 If `G` is not a directed acyclic graph (DAG) no topological sort exists
267 and a :exc:`NetworkXUnfeasible` exception is raised. This can also be
268 raised if `G` is changed while the returned iterator is being processed
269
270 RuntimeError
271 If `G` is changed while the returned iterator is being processed.
272
273 Examples
274 --------
275 To get the reverse order of the topological sort:
276
277 >>> DG = nx.DiGraph([(1, 2), (2, 3)])
278 >>> list(reversed(list(nx.topological_sort(DG))))
279 [3, 2, 1]
280
281 If your DiGraph naturally has the edges representing tasks/inputs
282 and nodes representing people/processes that initiate tasks, then
283 topological_sort is not quite what you need. You will have to change
284 the tasks to nodes with dependence reflected by edges. The result is
285 a kind of topological sort of the edges. This can be done
286 with :func:`networkx.line_graph` as follows:
287
288 >>> list(nx.topological_sort(nx.line_graph(DG)))
289 [(1, 2), (2, 3)]
290
291 Notes
292 -----
293 This algorithm is based on a description and proof in
294 "Introduction to Algorithms: A Creative Approach" [1]_ .
295
296 See also
297 --------
298 is_directed_acyclic_graph, lexicographical_topological_sort
299
300 References
301 ----------
302 .. [1] Manber, U. (1989).
303 *Introduction to Algorithms - A Creative Approach.* Addison-Wesley.
304 """
305 for generation in nx.topological_generations(G):
306 yield from generation
307
308
309@nx._dispatchable
310def lexicographical_topological_sort(G, key=None):
311 """Generate the nodes in the unique lexicographical topological sort order.
312
313 Generates a unique ordering of nodes by first sorting topologically (for which there are often
314 multiple valid orderings) and then additionally by sorting lexicographically.
315
316 A topological sort arranges the nodes of a directed graph so that the
317 upstream node of each directed edge precedes the downstream node.
318 It is always possible to find a solution for directed graphs that have no cycles.
319 There may be more than one valid solution.
320
321 Lexicographical sorting is just sorting alphabetically. It is used here to break ties in the
322 topological sort and to determine a single, unique ordering. This can be useful in comparing
323 sort results.
324
325 The lexicographical order can be customized by providing a function to the `key=` parameter.
326 The definition of the key function is the same as used in python's built-in `sort()`.
327 The function takes a single argument and returns a key to use for sorting purposes.
328
329 Lexicographical sorting can fail if the node names are un-sortable. See the example below.
330 The solution is to provide a function to the `key=` argument that returns sortable keys.
331
332
333 Parameters
334 ----------
335 G : NetworkX digraph
336 A directed acyclic graph (DAG)
337
338 key : function, optional
339 A function of one argument that converts a node name to a comparison key.
340 It defines and resolves ambiguities in the sort order. Defaults to the identity function.
341
342 Yields
343 ------
344 nodes
345 Yields the nodes of G in lexicographical topological sort order.
346
347 Raises
348 ------
349 NetworkXError
350 Topological sort is defined for directed graphs only. If the graph `G`
351 is undirected, a :exc:`NetworkXError` is raised.
352
353 NetworkXUnfeasible
354 If `G` is not a directed acyclic graph (DAG) no topological sort exists
355 and a :exc:`NetworkXUnfeasible` exception is raised. This can also be
356 raised if `G` is changed while the returned iterator is being processed
357
358 RuntimeError
359 If `G` is changed while the returned iterator is being processed.
360
361 TypeError
362 Results from un-sortable node names.
363 Consider using `key=` parameter to resolve ambiguities in the sort order.
364
365 Examples
366 --------
367 >>> DG = nx.DiGraph([(2, 1), (2, 5), (1, 3), (1, 4), (5, 4)])
368 >>> list(nx.lexicographical_topological_sort(DG))
369 [2, 1, 3, 5, 4]
370 >>> list(nx.lexicographical_topological_sort(DG, key=lambda x: -x))
371 [2, 5, 1, 4, 3]
372
373 The sort will fail for any graph with integer and string nodes. Comparison of integer to strings
374 is not defined in python. Is 3 greater or less than 'red'?
375
376 >>> DG = nx.DiGraph([(1, "red"), (3, "red"), (1, "green"), (2, "blue")])
377 >>> list(nx.lexicographical_topological_sort(DG))
378 Traceback (most recent call last):
379 ...
380 TypeError: '<' not supported between instances of 'str' and 'int'
381 ...
382
383 Incomparable nodes can be resolved using a `key` function. This example function
384 allows comparison of integers and strings by returning a tuple where the first
385 element is True for `str`, False otherwise. The second element is the node name.
386 This groups the strings and integers separately so they can be compared only among themselves.
387
388 >>> key = lambda node: (isinstance(node, str), node)
389 >>> list(nx.lexicographical_topological_sort(DG, key=key))
390 [1, 2, 3, 'blue', 'green', 'red']
391
392 Notes
393 -----
394 This algorithm is based on a description and proof in
395 "Introduction to Algorithms: A Creative Approach" [1]_ .
396
397 See also
398 --------
399 topological_sort
400
401 References
402 ----------
403 .. [1] Manber, U. (1989).
404 *Introduction to Algorithms - A Creative Approach.* Addison-Wesley.
405 """
406 if not G.is_directed():
407 msg = "Topological sort not defined on undirected graphs."
408 raise nx.NetworkXError(msg)
409
410 if key is None:
411
412 def key(node):
413 return node
414
415 nodeid_map = {n: i for i, n in enumerate(G)}
416
417 def create_tuple(node):
418 return key(node), nodeid_map[node], node
419
420 indegree_map = {v: d for v, d in G.in_degree() if d > 0}
421 # These nodes have zero indegree and ready to be returned.
422 zero_indegree = [create_tuple(v) for v, d in G.in_degree() if d == 0]
423 heapq.heapify(zero_indegree)
424
425 while zero_indegree:
426 _, _, node = heapq.heappop(zero_indegree)
427
428 if node not in G:
429 raise RuntimeError("Graph changed during iteration")
430 for _, child in G.edges(node):
431 try:
432 indegree_map[child] -= 1
433 except KeyError as err:
434 raise RuntimeError("Graph changed during iteration") from err
435 if indegree_map[child] == 0:
436 try:
437 heapq.heappush(zero_indegree, create_tuple(child))
438 except TypeError as err:
439 raise TypeError(
440 f"{err}\nConsider using `key=` parameter to resolve ambiguities in the sort order."
441 )
442 del indegree_map[child]
443
444 yield node
445
446 if indegree_map:
447 msg = "Graph contains a cycle or graph changed during iteration"
448 raise nx.NetworkXUnfeasible(msg)
449
450
451@not_implemented_for("undirected")
452@nx._dispatchable
453def all_topological_sorts(G):
454 """Returns a generator of _all_ topological sorts of the directed graph G.
455
456 A topological sort is a nonunique permutation of the nodes such that an
457 edge from u to v implies that u appears before v in the topological sort
458 order.
459
460 Parameters
461 ----------
462 G : NetworkX DiGraph
463 A directed graph
464
465 Yields
466 ------
467 topological_sort_order : list
468 a list of nodes in `G`, representing one of the topological sort orders
469
470 Raises
471 ------
472 NetworkXNotImplemented
473 If `G` is not directed
474 NetworkXUnfeasible
475 If `G` is not acyclic
476
477 Examples
478 --------
479 To enumerate all topological sorts of directed graph:
480
481 >>> DG = nx.DiGraph([(1, 2), (2, 3), (2, 4)])
482 >>> list(nx.all_topological_sorts(DG))
483 [[1, 2, 4, 3], [1, 2, 3, 4]]
484
485 Notes
486 -----
487 Implements an iterative version of the algorithm given in [1].
488
489 References
490 ----------
491 .. [1] Knuth, Donald E., Szwarcfiter, Jayme L. (1974).
492 "A Structured Program to Generate All Topological Sorting Arrangements"
493 Information Processing Letters, Volume 2, Issue 6, 1974, Pages 153-157,
494 ISSN 0020-0190,
495 https://doi.org/10.1016/0020-0190(74)90001-5.
496 Elsevier (North-Holland), Amsterdam
497 """
498 if not G.is_directed():
499 raise nx.NetworkXError("Topological sort not defined on undirected graphs.")
500
501 # the names of count and D are chosen to match the global variables in [1]
502 # number of edges originating in a vertex v
503 count = dict(G.in_degree())
504 # vertices with indegree 0
505 D = deque([v for v, d in G.in_degree() if d == 0])
506 # stack of first value chosen at a position k in the topological sort
507 bases = []
508 current_sort = []
509
510 # do-while construct
511 while True:
512 assert all(count[v] == 0 for v in D)
513
514 if len(current_sort) == len(G):
515 yield list(current_sort)
516
517 # clean-up stack
518 while len(current_sort) > 0:
519 assert len(bases) == len(current_sort)
520 q = current_sort.pop()
521
522 # "restores" all edges (q, x)
523 # NOTE: it is important to iterate over edges instead
524 # of successors, so count is updated correctly in multigraphs
525 for _, j in G.out_edges(q):
526 count[j] += 1
527 assert count[j] >= 0
528 # remove entries from D
529 while len(D) > 0 and count[D[-1]] > 0:
530 D.pop()
531
532 # corresponds to a circular shift of the values in D
533 # if the first value chosen (the base) is in the first
534 # position of D again, we are done and need to consider the
535 # previous condition
536 D.appendleft(q)
537 if D[-1] == bases[-1]:
538 # all possible values have been chosen at current position
539 # remove corresponding marker
540 bases.pop()
541 else:
542 # there are still elements that have not been fixed
543 # at the current position in the topological sort
544 # stop removing elements, escape inner loop
545 break
546
547 else:
548 if len(D) == 0:
549 raise nx.NetworkXUnfeasible("Graph contains a cycle.")
550
551 # choose next node
552 q = D.pop()
553 # "erase" all edges (q, x)
554 # NOTE: it is important to iterate over edges instead
555 # of successors, so count is updated correctly in multigraphs
556 for _, j in G.out_edges(q):
557 count[j] -= 1
558 assert count[j] >= 0
559 if count[j] == 0:
560 D.append(j)
561 current_sort.append(q)
562
563 # base for current position might _not_ be fixed yet
564 if len(bases) < len(current_sort):
565 bases.append(q)
566
567 if len(bases) == 0:
568 break
569
570
571@nx._dispatchable
572def is_aperiodic(G):
573 """Returns True if `G` is aperiodic.
574
575 A strongly connected directed graph is aperiodic if there is no integer ``k > 1``
576 that divides the length of every cycle in the graph.
577
578 This function requires the graph `G` to be strongly connected and will raise
579 an error if it's not. For graphs that are not strongly connected, you should
580 first identify their strongly connected components
581 (using :func:`~networkx.algorithms.components.strongly_connected_components`)
582 or attracting components
583 (using :func:`~networkx.algorithms.components.attracting_components`),
584 and then apply this function to those individual components.
585
586 Parameters
587 ----------
588 G : NetworkX DiGraph
589 A directed graph
590
591 Returns
592 -------
593 bool
594 True if the graph is aperiodic False otherwise
595
596 Raises
597 ------
598 NetworkXError
599 If `G` is not directed
600 NetworkXError
601 If `G` is not strongly connected
602 NetworkXPointlessConcept
603 If `G` has no nodes
604
605 Examples
606 --------
607 A graph consisting of one cycle, the length of which is 2. Therefore ``k = 2``
608 divides the length of every cycle in the graph and thus the graph
609 is *not aperiodic*::
610
611 >>> DG = nx.DiGraph([(1, 2), (2, 1)])
612 >>> nx.is_aperiodic(DG)
613 False
614
615 A graph consisting of two cycles: one of length 2 and the other of length 3.
616 The cycle lengths are coprime, so there is no single value of k where ``k > 1``
617 that divides each cycle length and therefore the graph is *aperiodic*::
618
619 >>> DG = nx.DiGraph([(1, 2), (2, 3), (3, 1), (1, 4), (4, 1)])
620 >>> nx.is_aperiodic(DG)
621 True
622
623 A graph created from cycles of the same length can still be aperiodic since
624 the cycles can overlap and form new cycles of different lengths. For example,
625 the following graph contains a cycle ``[4, 2, 3, 1]`` of length 4, which is coprime
626 with the explicitly added cycles of length 3, so the graph is aperiodic::
627
628 >>> DG = nx.DiGraph()
629 >>> nx.add_cycle(DG, [1, 2, 3])
630 >>> nx.add_cycle(DG, [2, 1, 4])
631 >>> nx.is_aperiodic(DG)
632 True
633
634 A single-node graph's aperiodicity depends on whether it has a self-loop:
635 it is aperiodic if a self-loop exists, and periodic otherwise::
636
637 >>> G = nx.DiGraph()
638 >>> G.add_node(1)
639 >>> nx.is_aperiodic(G)
640 False
641 >>> G.add_edge(1, 1)
642 >>> nx.is_aperiodic(G)
643 True
644
645 A Markov chain can be modeled as a directed graph, with nodes representing
646 states and edges representing transitions with non-zero probability.
647 Aperiodicity is typically considered for irreducible Markov chains,
648 which are those that are *strongly connected* as graphs.
649
650 The following Markov chain is irreducible and aperiodic, and thus
651 ergodic. It is guaranteed to have a unique stationary distribution::
652
653 >>> G = nx.DiGraph()
654 >>> nx.add_cycle(G, [1, 2, 3, 4])
655 >>> G.add_edge(1, 3)
656 >>> nx.is_aperiodic(G)
657 True
658
659 Reducible Markov chains can sometimes have a unique stationary distribution.
660 This occurs if the chain has exactly one closed communicating class and
661 that class itself is aperiodic (see [1]_). You can use
662 :func:`~networkx.algorithms.components.attracting_components`
663 to find these closed communicating classes::
664
665 >>> G = nx.DiGraph([(1, 3), (2, 3)])
666 >>> nx.add_cycle(G, [3, 4, 5, 6])
667 >>> nx.add_cycle(G, [3, 5, 6])
668 >>> communicating_classes = list(nx.strongly_connected_components(G))
669 >>> len(communicating_classes)
670 3
671 >>> closed_communicating_classes = list(nx.attracting_components(G))
672 >>> len(closed_communicating_classes)
673 1
674 >>> nx.is_aperiodic(G.subgraph(closed_communicating_classes[0]))
675 True
676
677 Notes
678 -----
679 This uses the method outlined in [1]_, which runs in $O(m)$ time
680 given $m$ edges in `G`.
681
682 References
683 ----------
684 .. [1] Jarvis, J. P.; Shier, D. R. (1996),
685 "Graph-theoretic analysis of finite Markov chains,"
686 in Shier, D. R.; Wallenius, K. T., Applied Mathematical Modeling:
687 A Multidisciplinary Approach, CRC Press.
688 """
689 if not G.is_directed():
690 raise nx.NetworkXError("is_aperiodic not defined for undirected graphs")
691 if len(G) == 0:
692 raise nx.NetworkXPointlessConcept("Graph has no nodes.")
693 if not nx.is_strongly_connected(G):
694 raise nx.NetworkXError("Graph is not strongly connected.")
695 s = arbitrary_element(G)
696 levels = {s: 0}
697 this_level = [s]
698 g = 0
699 lev = 1
700 while this_level:
701 next_level = []
702 for u in this_level:
703 for v in G[u]:
704 if v in levels: # Non-Tree Edge
705 g = gcd(g, levels[u] - levels[v] + 1)
706 else: # Tree Edge
707 next_level.append(v)
708 levels[v] = lev
709 this_level = next_level
710 lev += 1
711 return g == 1
712
713
714@nx._dispatchable(preserve_all_attrs=True, returns_graph=True)
715def transitive_closure(G, reflexive=False):
716 """Returns transitive closure of a graph
717
718 The transitive closure of G = (V,E) is a graph G+ = (V,E+) such that
719 for all v, w in V there is an edge (v, w) in E+ if and only if there
720 is a path from v to w in G.
721
722 Handling of paths from v to v has some flexibility within this definition.
723 A reflexive transitive closure creates a self-loop for the path
724 from v to v of length 0. The usual transitive closure creates a
725 self-loop only if a cycle exists (a path from v to v with length > 0).
726 We also allow an option for no self-loops.
727
728 Parameters
729 ----------
730 G : NetworkX Graph
731 A directed/undirected graph/multigraph.
732 reflexive : Bool or None, optional (default: False)
733 Determines when cycles create self-loops in the Transitive Closure.
734 If True, trivial cycles (length 0) create self-loops. The result
735 is a reflexive transitive closure of G.
736 If False (the default) non-trivial cycles create self-loops.
737 If None, self-loops are not created.
738
739 Returns
740 -------
741 NetworkX graph
742 The transitive closure of `G`
743
744 Raises
745 ------
746 NetworkXError
747 If `reflexive` not in `{None, True, False}`
748
749 Examples
750 --------
751 The treatment of trivial (i.e. length 0) cycles is controlled by the
752 `reflexive` parameter.
753
754 Trivial (i.e. length 0) cycles do not create self-loops when
755 ``reflexive=False`` (the default)::
756
757 >>> DG = nx.DiGraph([(1, 2), (2, 3)])
758 >>> TC = nx.transitive_closure(DG, reflexive=False)
759 >>> TC.edges()
760 OutEdgeView([(1, 2), (1, 3), (2, 3)])
761
762 However, nontrivial (i.e. length greater than 0) cycles create self-loops
763 when ``reflexive=False`` (the default)::
764
765 >>> DG = nx.DiGraph([(1, 2), (2, 3), (3, 1)])
766 >>> TC = nx.transitive_closure(DG, reflexive=False)
767 >>> TC.edges()
768 OutEdgeView([(1, 2), (1, 3), (1, 1), (2, 3), (2, 1), (2, 2), (3, 1), (3, 2), (3, 3)])
769
770 Trivial cycles (length 0) create self-loops when ``reflexive=True``::
771
772 >>> DG = nx.DiGraph([(1, 2), (2, 3)])
773 >>> TC = nx.transitive_closure(DG, reflexive=True)
774 >>> TC.edges()
775 OutEdgeView([(1, 2), (1, 1), (1, 3), (2, 3), (2, 2), (3, 3)])
776
777 And the third option is not to create self-loops at all when ``reflexive=None``::
778
779 >>> DG = nx.DiGraph([(1, 2), (2, 3), (3, 1)])
780 >>> TC = nx.transitive_closure(DG, reflexive=None)
781 >>> TC.edges()
782 OutEdgeView([(1, 2), (1, 3), (2, 3), (2, 1), (3, 1), (3, 2)])
783
784 References
785 ----------
786 .. [1] https://www.ics.uci.edu/~eppstein/PADS/PartialOrder.py
787 """
788 TC = G.copy()
789
790 if reflexive not in {None, True, False}:
791 raise nx.NetworkXError("Incorrect value for the parameter `reflexive`")
792
793 for v in G:
794 if reflexive is None:
795 TC.add_edges_from((v, u) for u in nx.descendants(G, v) if u not in TC[v])
796 elif reflexive is True:
797 TC.add_edges_from(
798 (v, u) for u in nx.descendants(G, v) | {v} if u not in TC[v]
799 )
800 elif reflexive is False:
801 TC.add_edges_from((v, e[1]) for e in nx.edge_bfs(G, v) if e[1] not in TC[v])
802
803 return TC
804
805
806@not_implemented_for("undirected")
807@nx._dispatchable(preserve_all_attrs=True, returns_graph=True)
808def transitive_closure_dag(G, topo_order=None):
809 """Returns the transitive closure of a directed acyclic graph.
810
811 This function is faster than the function `transitive_closure`, but fails
812 if the graph has a cycle.
813
814 The transitive closure of G = (V,E) is a graph G+ = (V,E+) such that
815 for all v, w in V there is an edge (v, w) in E+ if and only if there
816 is a non-null path from v to w in G.
817
818 Parameters
819 ----------
820 G : NetworkX DiGraph
821 A directed acyclic graph (DAG)
822
823 topo_order: list or tuple, optional
824 A topological order for G (if None, the function will compute one)
825
826 Returns
827 -------
828 NetworkX DiGraph
829 The transitive closure of `G`
830
831 Raises
832 ------
833 NetworkXNotImplemented
834 If `G` is not directed
835 NetworkXUnfeasible
836 If `G` has a cycle
837
838 Examples
839 --------
840 >>> DG = nx.DiGraph([(1, 2), (2, 3)])
841 >>> TC = nx.transitive_closure_dag(DG)
842 >>> TC.edges()
843 OutEdgeView([(1, 2), (1, 3), (2, 3)])
844
845 Notes
846 -----
847 This algorithm is probably simple enough to be well-known but I didn't find
848 a mention in the literature.
849 """
850 if topo_order is None:
851 topo_order = list(topological_sort(G))
852
853 TC = G.copy()
854
855 # idea: traverse vertices following a reverse topological order, connecting
856 # each vertex to its descendants at distance 2 as we go
857 for v in reversed(topo_order):
858 TC.add_edges_from((v, u) for u in nx.descendants_at_distance(TC, v, 2))
859
860 return TC
861
862
863@not_implemented_for("undirected")
864@nx._dispatchable(returns_graph=True)
865def transitive_reduction(G):
866 """Returns transitive reduction of a directed graph
867
868 The transitive reduction of G = (V,E) is a graph G- = (V,E-) such that
869 for all v,w in V there is an edge (v,w) in E- if and only if (v,w) is
870 in E and there is no path from v to w in G with length greater than 1.
871
872 Parameters
873 ----------
874 G : NetworkX DiGraph
875 A directed acyclic graph (DAG)
876
877 Returns
878 -------
879 NetworkX DiGraph
880 The transitive reduction of `G`
881
882 Raises
883 ------
884 NetworkXError
885 If `G` is not a directed acyclic graph (DAG) transitive reduction is
886 not uniquely defined and a :exc:`NetworkXError` exception is raised.
887
888 Examples
889 --------
890 To perform transitive reduction on a DiGraph:
891
892 >>> DG = nx.DiGraph([(1, 2), (2, 3), (1, 3)])
893 >>> TR = nx.transitive_reduction(DG)
894 >>> list(TR.edges)
895 [(1, 2), (2, 3)]
896
897 To avoid unnecessary data copies, this implementation does not return a
898 DiGraph with node/edge data.
899 To perform transitive reduction on a DiGraph and transfer node/edge data:
900
901 >>> DG = nx.DiGraph()
902 >>> DG.add_edges_from([(1, 2), (2, 3), (1, 3)], color="red")
903 >>> TR = nx.transitive_reduction(DG)
904 >>> TR.add_nodes_from(DG.nodes(data=True))
905 >>> TR.add_edges_from((u, v, DG.edges[u, v]) for u, v in TR.edges)
906 >>> list(TR.edges(data=True))
907 [(1, 2, {'color': 'red'}), (2, 3, {'color': 'red'})]
908
909 References
910 ----------
911 https://en.wikipedia.org/wiki/Transitive_reduction
912
913 """
914 if not is_directed_acyclic_graph(G):
915 msg = "Directed Acyclic Graph required for transitive_reduction"
916 raise nx.NetworkXError(msg)
917 TR = nx.DiGraph()
918 TR.add_nodes_from(G.nodes())
919 descendants = {}
920 # count before removing set stored in descendants
921 check_count = dict(G.in_degree)
922 for u in G:
923 u_nbrs = set(G[u])
924 for v in G[u]:
925 if v in u_nbrs:
926 if v not in descendants:
927 descendants[v] = {y for x, y in nx.dfs_edges(G, v)}
928 u_nbrs -= descendants[v]
929 check_count[v] -= 1
930 if check_count[v] == 0:
931 del descendants[v]
932 TR.add_edges_from((u, v) for v in u_nbrs)
933 return TR
934
935
936@not_implemented_for("undirected")
937@nx._dispatchable
938def antichains(G, topo_order=None):
939 """Generates antichains from a directed acyclic graph (DAG).
940
941 An antichain is a subset of a partially ordered set such that any
942 two elements in the subset are incomparable.
943
944 Parameters
945 ----------
946 G : NetworkX DiGraph
947 A directed acyclic graph (DAG)
948
949 topo_order: list or tuple, optional
950 A topological order for G (if None, the function will compute one)
951
952 Yields
953 ------
954 antichain : list
955 a list of nodes in `G` representing an antichain
956
957 Raises
958 ------
959 NetworkXNotImplemented
960 If `G` is not directed
961
962 NetworkXUnfeasible
963 If `G` contains a cycle
964
965 Examples
966 --------
967 >>> DG = nx.DiGraph([(1, 2), (1, 3)])
968 >>> list(nx.antichains(DG))
969 [[], [3], [2], [2, 3], [1]]
970
971 Notes
972 -----
973 This function was originally developed by Peter Jipsen and Franco Saliola
974 for the SAGE project. It's included in NetworkX with permission from the
975 authors. Original SAGE code at:
976
977 https://github.com/sagemath/sage/blob/master/src/sage/combinat/posets/hasse_diagram.py
978
979 References
980 ----------
981 .. [1] Free Lattices, by R. Freese, J. Jezek and J. B. Nation,
982 AMS, Vol 42, 1995, p. 226.
983 """
984 if topo_order is None:
985 topo_order = list(nx.topological_sort(G))
986
987 TC = nx.transitive_closure_dag(G, topo_order)
988 antichains_stacks = [([], list(reversed(topo_order)))]
989
990 while antichains_stacks:
991 (antichain, stack) = antichains_stacks.pop()
992 # Invariant:
993 # - the elements of antichain are independent
994 # - the elements of stack are independent from those of antichain
995 yield antichain
996 while stack:
997 x = stack.pop()
998 new_antichain = antichain + [x]
999 new_stack = [t for t in stack if not ((t in TC[x]) or (x in TC[t]))]
1000 antichains_stacks.append((new_antichain, new_stack))
1001
1002
1003@not_implemented_for("undirected")
1004@nx._dispatchable(edge_attrs={"weight": "default_weight"})
1005def dag_longest_path(G, weight="weight", default_weight=1, topo_order=None):
1006 """Returns the longest path in a directed acyclic graph (DAG).
1007
1008 If `G` has edges with `weight` attribute the edge data are used as
1009 weight values.
1010
1011 Parameters
1012 ----------
1013 G : NetworkX DiGraph
1014 A directed acyclic graph (DAG)
1015
1016 weight : str, optional
1017 Edge data key to use for weight
1018
1019 default_weight : int, optional
1020 The weight of edges that do not have a weight attribute
1021
1022 topo_order: list or tuple, optional
1023 A topological order for `G` (if None, the function will compute one)
1024
1025 Returns
1026 -------
1027 list
1028 Longest path
1029
1030 Raises
1031 ------
1032 NetworkXNotImplemented
1033 If `G` is not directed
1034
1035 Examples
1036 --------
1037 >>> DG = nx.DiGraph(
1038 ... [(0, 1, {"cost": 1}), (1, 2, {"cost": 1}), (0, 2, {"cost": 42})]
1039 ... )
1040 >>> list(nx.all_simple_paths(DG, 0, 2))
1041 [[0, 1, 2], [0, 2]]
1042 >>> nx.dag_longest_path(DG)
1043 [0, 1, 2]
1044 >>> nx.dag_longest_path(DG, weight="cost")
1045 [0, 2]
1046
1047 In the case where multiple valid topological orderings exist, `topo_order`
1048 can be used to specify a specific ordering:
1049
1050 >>> DG = nx.DiGraph([(0, 1), (0, 2)])
1051 >>> sorted(nx.all_topological_sorts(DG)) # Valid topological orderings
1052 [[0, 1, 2], [0, 2, 1]]
1053 >>> nx.dag_longest_path(DG, topo_order=[0, 1, 2])
1054 [0, 1]
1055 >>> nx.dag_longest_path(DG, topo_order=[0, 2, 1])
1056 [0, 2]
1057
1058 See also
1059 --------
1060 dag_longest_path_length
1061
1062 """
1063 if not G:
1064 return []
1065
1066 if topo_order is None:
1067 topo_order = nx.topological_sort(G)
1068
1069 dist = {} # stores {v : (length, u)}
1070 for v in topo_order:
1071 us = [
1072 (
1073 dist[u][0]
1074 + (
1075 max(data.values(), key=lambda x: x.get(weight, default_weight))
1076 if G.is_multigraph()
1077 else data
1078 ).get(weight, default_weight),
1079 u,
1080 )
1081 for u, data in G.pred[v].items()
1082 ]
1083
1084 # Use the best predecessor if there is one and its distance is
1085 # non-negative, otherwise terminate.
1086 maxu = max(us, key=lambda x: x[0]) if us else (0, v)
1087 dist[v] = maxu if maxu[0] >= 0 else (0, v)
1088
1089 u = None
1090 v = max(dist, key=lambda x: dist[x][0])
1091 path = []
1092 while u != v:
1093 path.append(v)
1094 u = v
1095 v = dist[v][1]
1096
1097 path.reverse()
1098 return path
1099
1100
1101@not_implemented_for("undirected")
1102@nx._dispatchable(edge_attrs={"weight": "default_weight"})
1103def dag_longest_path_length(G, weight="weight", default_weight=1):
1104 """Returns the longest path length in a DAG
1105
1106 Parameters
1107 ----------
1108 G : NetworkX DiGraph
1109 A directed acyclic graph (DAG)
1110
1111 weight : string, optional
1112 Edge data key to use for weight
1113
1114 default_weight : int, optional
1115 The weight of edges that do not have a weight attribute
1116
1117 Returns
1118 -------
1119 int
1120 Longest path length
1121
1122 Raises
1123 ------
1124 NetworkXNotImplemented
1125 If `G` is not directed
1126
1127 Examples
1128 --------
1129 >>> DG = nx.DiGraph(
1130 ... [(0, 1, {"cost": 1}), (1, 2, {"cost": 1}), (0, 2, {"cost": 42})]
1131 ... )
1132 >>> list(nx.all_simple_paths(DG, 0, 2))
1133 [[0, 1, 2], [0, 2]]
1134 >>> nx.dag_longest_path_length(DG)
1135 2
1136 >>> nx.dag_longest_path_length(DG, weight="cost")
1137 42
1138
1139 See also
1140 --------
1141 dag_longest_path
1142 """
1143 path = nx.dag_longest_path(G, weight, default_weight)
1144 path_length = 0
1145 if G.is_multigraph():
1146 for u, v in pairwise(path):
1147 i = max(G[u][v], key=lambda x: G[u][v][x].get(weight, default_weight))
1148 path_length += G[u][v][i].get(weight, default_weight)
1149 else:
1150 for u, v in pairwise(path):
1151 path_length += G[u][v].get(weight, default_weight)
1152
1153 return path_length
1154
1155
1156@nx._dispatchable
1157def root_to_leaf_paths(G):
1158 """Yields root-to-leaf paths in a directed acyclic graph.
1159
1160 `G` must be a directed acyclic graph. If not, the behavior of this
1161 function is undefined. A "root" in this graph is a node of in-degree
1162 zero and a "leaf" a node of out-degree zero.
1163
1164 When invoked, this function iterates over each path from any root to
1165 any leaf. A path is a list of nodes.
1166
1167 """
1168 roots = (v for v, d in G.in_degree() if d == 0)
1169 leaves = (v for v, d in G.out_degree() if d == 0)
1170
1171 for root, leaf in product(roots, leaves):
1172 yield from nx.all_simple_paths(G, root, leaf)
1173
1174
1175@not_implemented_for("multigraph")
1176@not_implemented_for("undirected")
1177@nx._dispatchable(returns_graph=True)
1178def dag_to_branching(G):
1179 """Returns a branching representing all (overlapping) paths from
1180 root nodes to leaf nodes in the given directed acyclic graph.
1181
1182 As described in :mod:`networkx.algorithms.tree.recognition`, a
1183 *branching* is a directed forest in which each node has at most one
1184 parent. In other words, a branching is a disjoint union of
1185 *arborescences*. For this function, each node of in-degree zero in
1186 `G` becomes a root of one of the arborescences, and there will be
1187 one leaf node for each distinct path from that root to a leaf node
1188 in `G`.
1189
1190 Each node `v` in `G` with *k* parents becomes *k* distinct nodes in
1191 the returned branching, one for each parent, and the sub-DAG rooted
1192 at `v` is duplicated for each copy. The algorithm then recurses on
1193 the children of each copy of `v`.
1194
1195 Parameters
1196 ----------
1197 G : NetworkX graph
1198 A directed acyclic graph.
1199
1200 Returns
1201 -------
1202 DiGraph
1203 The branching in which there is a bijection between root-to-leaf
1204 paths in `G` (in which multiple paths may share the same leaf)
1205 and root-to-leaf paths in the branching (in which there is a
1206 unique path from a root to a leaf).
1207
1208 Each node has an attribute 'source' whose value is the original
1209 node to which this node corresponds. No other graph, node, or
1210 edge attributes are copied into this new graph.
1211
1212 Raises
1213 ------
1214 NetworkXNotImplemented
1215 If `G` is not directed, or if `G` is a multigraph.
1216
1217 HasACycle
1218 If `G` is not acyclic.
1219
1220 Examples
1221 --------
1222 To examine which nodes in the returned branching were produced by
1223 which original node in the directed acyclic graph, we can collect
1224 the mapping from source node to new nodes into a dictionary. For
1225 example, consider the directed diamond graph::
1226
1227 >>> from collections import defaultdict
1228 >>> from operator import itemgetter
1229 >>>
1230 >>> G = nx.DiGraph(nx.utils.pairwise("abd"))
1231 >>> G.add_edges_from(nx.utils.pairwise("acd"))
1232 >>> B = nx.dag_to_branching(G)
1233 >>>
1234 >>> sources = defaultdict(set)
1235 >>> for v, source in B.nodes(data="source"):
1236 ... sources[source].add(v)
1237 >>> len(sources["a"])
1238 1
1239 >>> len(sources["d"])
1240 2
1241
1242 To copy node attributes from the original graph to the new graph,
1243 you can use a dictionary like the one constructed in the above
1244 example::
1245
1246 >>> for source, nodes in sources.items():
1247 ... for v in nodes:
1248 ... B.nodes[v].update(G.nodes[source])
1249
1250 Notes
1251 -----
1252 This function is not idempotent in the sense that the node labels in
1253 the returned branching may be uniquely generated each time the
1254 function is invoked. In fact, the node labels may not be integers;
1255 in order to relabel the nodes to be more readable, you can use the
1256 :func:`networkx.convert_node_labels_to_integers` function.
1257
1258 The current implementation of this function uses
1259 :func:`networkx.prefix_tree`, so it is subject to the limitations of
1260 that function.
1261
1262 """
1263 if has_cycle(G):
1264 msg = "dag_to_branching is only defined for acyclic graphs"
1265 raise nx.HasACycle(msg)
1266 paths = root_to_leaf_paths(G)
1267 B = nx.prefix_tree(paths)
1268 # Remove the synthetic `root`(0) and `NIL`(-1) nodes from the tree
1269 B.remove_node(0)
1270 B.remove_node(-1)
1271 return B
1272
1273
1274@not_implemented_for("undirected")
1275@nx._dispatchable
1276def v_structures(G):
1277 """Yields 3-node tuples that represent the v-structures in `G`.
1278
1279 Colliders are triples in the directed acyclic graph (DAG) where two parent nodes
1280 point to the same child node. V-structures are colliders where the two parent
1281 nodes are not adjacent. In a causal graph setting, the parents do not directly
1282 depend on each other, but conditioning on the child node provides an association.
1283
1284 Parameters
1285 ----------
1286 G : graph
1287 A networkx `~networkx.DiGraph`.
1288
1289 Yields
1290 ------
1291 A 3-tuple representation of a v-structure
1292 Each v-structure is a 3-tuple with the parent, collider, and other parent.
1293
1294 Raises
1295 ------
1296 NetworkXNotImplemented
1297 If `G` is an undirected graph.
1298
1299 Examples
1300 --------
1301 >>> G = nx.DiGraph([(1, 2), (0, 4), (3, 1), (2, 4), (0, 5), (4, 5), (1, 5)])
1302 >>> nx.is_directed_acyclic_graph(G)
1303 True
1304 >>> list(nx.dag.v_structures(G))
1305 [(0, 4, 2), (0, 5, 1), (4, 5, 1)]
1306
1307 See Also
1308 --------
1309 colliders
1310
1311 Notes
1312 -----
1313 This function was written to be used on DAGs, however it works on cyclic graphs
1314 too. Since colliders are referred to in the cyclic causal graph literature
1315 [2]_ we allow cyclic graphs in this function. It is suggested that you test if
1316 your input graph is acyclic as in the example if you want that property.
1317
1318 References
1319 ----------
1320 .. [1] `Pearl's PRIMER <https://bayes.cs.ucla.edu/PRIMER/primer-ch2.pdf>`_
1321 Ch-2 page 50: v-structures def.
1322 .. [2] A Hyttinen, P.O. Hoyer, F. Eberhardt, M J ̈arvisalo, (2013)
1323 "Discovering cyclic causal models with latent variables:
1324 a general SAT-based procedure", UAI'13: Proceedings of the Twenty-Ninth
1325 Conference on Uncertainty in Artificial Intelligence, pg 301–310,
1326 `doi:10.5555/3023638.3023669 <https://dl.acm.org/doi/10.5555/3023638.3023669>`_
1327 """
1328 for p1, c, p2 in colliders(G):
1329 if not (G.has_edge(p1, p2) or G.has_edge(p2, p1)):
1330 yield (p1, c, p2)
1331
1332
1333@not_implemented_for("undirected")
1334@nx._dispatchable
1335def colliders(G):
1336 """Yields 3-node tuples that represent the colliders in `G`.
1337
1338 In a Directed Acyclic Graph (DAG), if you have three nodes A, B, and C, and
1339 there are edges from A to C and from B to C, then C is a collider [1]_ . In
1340 a causal graph setting, this means that both events A and B are "causing" C,
1341 and conditioning on C provide an association between A and B even if
1342 no direct causal relationship exists between A and B.
1343
1344 Parameters
1345 ----------
1346 G : graph
1347 A networkx `~networkx.DiGraph`.
1348
1349 Yields
1350 ------
1351 A 3-tuple representation of a collider
1352 Each collider is a 3-tuple with the parent, collider, and other parent.
1353
1354 Raises
1355 ------
1356 NetworkXNotImplemented
1357 If `G` is an undirected graph.
1358
1359 Examples
1360 --------
1361 >>> G = nx.DiGraph([(1, 2), (0, 4), (3, 1), (2, 4), (0, 5), (4, 5), (1, 5)])
1362 >>> nx.is_directed_acyclic_graph(G)
1363 True
1364 >>> list(nx.dag.colliders(G))
1365 [(0, 4, 2), (0, 5, 4), (0, 5, 1), (4, 5, 1)]
1366
1367 See Also
1368 --------
1369 v_structures
1370
1371 Notes
1372 -----
1373 This function was written to be used on DAGs, however it works on cyclic graphs
1374 too. Since colliders are referred to in the cyclic causal graph literature
1375 [2]_ we allow cyclic graphs in this function. It is suggested that you test if
1376 your input graph is acyclic as in the example if you want that property.
1377
1378 References
1379 ----------
1380 .. [1] `Wikipedia: Collider in causal graphs <https://en.wikipedia.org/wiki/Collider_(statistics)>`_
1381 .. [2] A Hyttinen, P.O. Hoyer, F. Eberhardt, M J ̈arvisalo, (2013)
1382 "Discovering cyclic causal models with latent variables:
1383 a general SAT-based procedure", UAI'13: Proceedings of the Twenty-Ninth
1384 Conference on Uncertainty in Artificial Intelligence, pg 301–310,
1385 `doi:10.5555/3023638.3023669 <https://dl.acm.org/doi/10.5555/3023638.3023669>`_
1386 """
1387 for node in G.nodes:
1388 for p1, p2 in combinations(G.predecessors(node), 2):
1389 yield (p1, node, p2)