1# This module uses material from the Wikipedia article Hopcroft--Karp algorithm
2# <https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm>, accessed on
3# January 3, 2015, which is released under the Creative Commons
4# Attribution-Share-Alike License 3.0
5# <http://creativecommons.org/licenses/by-sa/3.0/>. That article includes
6# pseudocode, which has been translated into the corresponding Python code.
7#
8# Portions of this module use code from David Eppstein's Python Algorithms and
9# Data Structures (PADS) library, which is dedicated to the public domain (for
10# proof, see <http://www.ics.uci.edu/~eppstein/PADS/ABOUT-PADS.txt>).
11"""Provides functions for computing maximum cardinality matchings and minimum
12weight full matchings in a bipartite graph.
13
14If you don't care about the particular implementation of the maximum matching
15algorithm, simply use the :func:`maximum_matching`. If you do care, you can
16import one of the named maximum matching algorithms directly.
17
18For example, to find a maximum matching in the complete bipartite graph with
19two vertices on the left and three vertices on the right:
20
21>>> G = nx.complete_bipartite_graph(2, 3)
22>>> left, right = nx.bipartite.sets(G)
23>>> list(left)
24[0, 1]
25>>> list(right)
26[2, 3, 4]
27>>> nx.bipartite.maximum_matching(G)
28{0: 2, 1: 3, 2: 0, 3: 1}
29
30The dictionary returned by :func:`maximum_matching` includes a mapping for
31vertices in both the left and right vertex sets.
32
33Similarly, :func:`minimum_weight_full_matching` produces, for a complete
34weighted bipartite graph, a matching whose cardinality is the cardinality of
35the smaller of the two partitions, and for which the sum of the weights of the
36edges included in the matching is minimal.
37
38"""
39
40import collections
41import itertools
42
43import networkx as nx
44from networkx.algorithms.bipartite import sets as bipartite_sets
45from networkx.algorithms.bipartite.matrix import biadjacency_matrix
46
47__all__ = [
48 "maximum_matching",
49 "hopcroft_karp_matching",
50 "eppstein_matching",
51 "to_vertex_cover",
52 "minimum_weight_full_matching",
53]
54
55INFINITY = float("inf")
56
57
58@nx._dispatchable
59def hopcroft_karp_matching(G, top_nodes=None):
60 """Returns the maximum cardinality matching of the bipartite graph `G`.
61
62 A matching is a set of edges that do not share any nodes. A maximum
63 cardinality matching is a matching with the most edges possible. It
64 is not always unique. Finding a matching in a bipartite graph can be
65 treated as a networkx flow problem.
66
67 The functions ``hopcroft_karp_matching`` and ``maximum_matching``
68 are aliases of the same function.
69
70 Parameters
71 ----------
72 G : NetworkX graph
73
74 Undirected bipartite graph
75
76 top_nodes : container of nodes
77
78 Container with all nodes in one bipartite node set. If not supplied
79 it will be computed. But if more than one solution exists an exception
80 will be raised.
81
82 Returns
83 -------
84 matches : dictionary
85
86 The matching is returned as a dictionary, `matches`, such that
87 ``matches[v] == w`` if node `v` is matched to node `w`. Unmatched
88 nodes do not occur as a key in `matches`.
89
90 Raises
91 ------
92 AmbiguousSolution
93 Raised if the input bipartite graph is disconnected and no container
94 with all nodes in one bipartite set is provided. When determining
95 the nodes in each bipartite set more than one valid solution is
96 possible if the input graph is disconnected.
97
98 Notes
99 -----
100 This function is implemented with the `Hopcroft--Karp matching algorithm
101 <https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm>`_ for
102 bipartite graphs.
103
104 See :mod:`bipartite documentation <networkx.algorithms.bipartite>`
105 for further details on how bipartite graphs are handled in NetworkX.
106
107 See Also
108 --------
109 maximum_matching
110 hopcroft_karp_matching
111 eppstein_matching
112
113 References
114 ----------
115 .. [1] John E. Hopcroft and Richard M. Karp. "An n^{5 / 2} Algorithm for
116 Maximum Matchings in Bipartite Graphs" In: **SIAM Journal of Computing**
117 2.4 (1973), pp. 225--231. <https://doi.org/10.1137/0202019>.
118
119 """
120
121 # First we define some auxiliary search functions.
122 #
123 # If you are a human reading these auxiliary search functions, the "global"
124 # variables `leftmatches`, `rightmatches`, `distances`, etc. are defined
125 # below the functions, so that they are initialized close to the initial
126 # invocation of the search functions.
127 def breadth_first_search():
128 for v in left:
129 if leftmatches[v] is None:
130 distances[v] = 0
131 queue.append(v)
132 else:
133 distances[v] = INFINITY
134 distances[None] = INFINITY
135 while queue:
136 v = queue.popleft()
137 if distances[v] < distances[None]:
138 for u in G[v]:
139 if distances[rightmatches[u]] is INFINITY:
140 distances[rightmatches[u]] = distances[v] + 1
141 queue.append(rightmatches[u])
142 return distances[None] is not INFINITY
143
144 def depth_first_search(v):
145 if v is not None:
146 for u in G[v]:
147 if distances[rightmatches[u]] == distances[v] + 1:
148 if depth_first_search(rightmatches[u]):
149 rightmatches[u] = v
150 leftmatches[v] = u
151 return True
152 distances[v] = INFINITY
153 return False
154 return True
155
156 # Initialize the "global" variables that maintain state during the search.
157 left, right = bipartite_sets(G, top_nodes)
158 leftmatches = {v: None for v in left}
159 rightmatches = {v: None for v in right}
160 distances = {}
161 queue = collections.deque()
162
163 # Implementation note: this counter is incremented as pairs are matched but
164 # it is currently not used elsewhere in the computation.
165 num_matched_pairs = 0
166 while breadth_first_search():
167 for v in left:
168 if leftmatches[v] is None:
169 if depth_first_search(v):
170 num_matched_pairs += 1
171
172 # Strip the entries matched to `None`.
173 leftmatches = {k: v for k, v in leftmatches.items() if v is not None}
174 rightmatches = {k: v for k, v in rightmatches.items() if v is not None}
175
176 # At this point, the left matches and the right matches are inverses of one
177 # another. In other words,
178 #
179 # leftmatches == {v, k for k, v in rightmatches.items()}
180 #
181 # Finally, we combine both the left matches and right matches.
182 return dict(itertools.chain(leftmatches.items(), rightmatches.items()))
183
184
185@nx._dispatchable
186def eppstein_matching(G, top_nodes=None):
187 """Returns the maximum cardinality matching of the bipartite graph `G`.
188
189 Parameters
190 ----------
191 G : NetworkX graph
192
193 Undirected bipartite graph
194
195 top_nodes : container
196
197 Container with all nodes in one bipartite node set. If not supplied
198 it will be computed. But if more than one solution exists an exception
199 will be raised.
200
201 Returns
202 -------
203 matches : dictionary
204
205 The matching is returned as a dictionary, `matching`, such that
206 ``matching[v] == w`` if node `v` is matched to node `w`. Unmatched
207 nodes do not occur as a key in `matching`.
208
209 Raises
210 ------
211 AmbiguousSolution
212 Raised if the input bipartite graph is disconnected and no container
213 with all nodes in one bipartite set is provided. When determining
214 the nodes in each bipartite set more than one valid solution is
215 possible if the input graph is disconnected.
216
217 Notes
218 -----
219 This function is implemented with David Eppstein's version of the algorithm
220 Hopcroft--Karp algorithm (see :func:`hopcroft_karp_matching`), which
221 originally appeared in the `Python Algorithms and Data Structures library
222 (PADS) <http://www.ics.uci.edu/~eppstein/PADS/ABOUT-PADS.txt>`_.
223
224 See :mod:`bipartite documentation <networkx.algorithms.bipartite>`
225 for further details on how bipartite graphs are handled in NetworkX.
226
227 See Also
228 --------
229
230 hopcroft_karp_matching
231
232 """
233 # Due to its original implementation, a directed graph is needed
234 # so that the two sets of bipartite nodes can be distinguished
235 left, right = bipartite_sets(G, top_nodes)
236 G = nx.DiGraph(G.edges(left))
237 # initialize greedy matching (redundant, but faster than full search)
238 matching = {}
239 for u in G:
240 for v in G[u]:
241 if v not in matching:
242 matching[v] = u
243 break
244 while True:
245 # structure residual graph into layers
246 # pred[u] gives the neighbor in the previous layer for u in U
247 # preds[v] gives a list of neighbors in the previous layer for v in V
248 # unmatched gives a list of unmatched vertices in final layer of V,
249 # and is also used as a flag value for pred[u] when u is in the first
250 # layer
251 preds = {}
252 unmatched = []
253 pred = {u: unmatched for u in G}
254 for v in matching:
255 del pred[matching[v]]
256 layer = list(pred)
257
258 # repeatedly extend layering structure by another pair of layers
259 while layer and not unmatched:
260 newLayer = {}
261 for u in layer:
262 for v in G[u]:
263 if v not in preds:
264 newLayer.setdefault(v, []).append(u)
265 layer = []
266 for v in newLayer:
267 preds[v] = newLayer[v]
268 if v in matching:
269 layer.append(matching[v])
270 pred[matching[v]] = v
271 else:
272 unmatched.append(v)
273
274 # did we finish layering without finding any alternating paths?
275 if not unmatched:
276 # TODO - The lines between --- were unused and were thus commented
277 # out. This whole commented chunk should be reviewed to determine
278 # whether it should be built upon or completely removed.
279 # ---
280 # unlayered = {}
281 # for u in G:
282 # # TODO Why is extra inner loop necessary?
283 # for v in G[u]:
284 # if v not in preds:
285 # unlayered[v] = None
286 # ---
287 # TODO Originally, this function returned a three-tuple:
288 #
289 # return (matching, list(pred), list(unlayered))
290 #
291 # For some reason, the documentation for this function
292 # indicated that the second and third elements of the returned
293 # three-tuple would be the vertices in the left and right vertex
294 # sets, respectively, that are also in the maximum independent set.
295 # However, what I think the author meant was that the second
296 # element is the list of vertices that were unmatched and the third
297 # element was the list of vertices that were matched. Since that
298 # seems to be the case, they don't really need to be returned,
299 # since that information can be inferred from the matching
300 # dictionary.
301
302 # All the matched nodes must be a key in the dictionary
303 for key in matching.copy():
304 matching[matching[key]] = key
305 return matching
306
307 # recursively search backward through layers to find alternating paths
308 # recursion returns true if found path, false otherwise
309 def recurse(v):
310 if v in preds:
311 L = preds.pop(v)
312 for u in L:
313 if u in pred:
314 pu = pred.pop(u)
315 if pu is unmatched or recurse(pu):
316 matching[v] = u
317 return True
318 return False
319
320 for v in unmatched:
321 recurse(v)
322
323
324def _is_connected_by_alternating_path(G, v, matched_edges, unmatched_edges, targets):
325 """Returns True if and only if the vertex `v` is connected to one of
326 the target vertices by an alternating path in `G`.
327
328 An *alternating path* is a path in which every other edge is in the
329 specified maximum matching (and the remaining edges in the path are not in
330 the matching). An alternating path may have matched edges in the even
331 positions or in the odd positions, as long as the edges alternate between
332 'matched' and 'unmatched'.
333
334 `G` is an undirected bipartite NetworkX graph.
335
336 `v` is a vertex in `G`.
337
338 `matched_edges` is a set of edges present in a maximum matching in `G`.
339
340 `unmatched_edges` is a set of edges not present in a maximum
341 matching in `G`.
342
343 `targets` is a set of vertices.
344
345 """
346
347 def _alternating_dfs(u, along_matched=True):
348 """Returns True if and only if `u` is connected to one of the
349 targets by an alternating path.
350
351 `u` is a vertex in the graph `G`.
352
353 If `along_matched` is True, this step of the depth-first search
354 will continue only through edges in the given matching. Otherwise, it
355 will continue only through edges *not* in the given matching.
356
357 """
358 visited = set()
359 # Follow matched edges when depth is even,
360 # and follow unmatched edges when depth is odd.
361 initial_depth = 0 if along_matched else 1
362 stack = [(u, iter(G[u]), initial_depth)]
363 while stack:
364 parent, children, depth = stack[-1]
365 valid_edges = matched_edges if depth % 2 else unmatched_edges
366 try:
367 child = next(children)
368 if child not in visited:
369 if (parent, child) in valid_edges or (child, parent) in valid_edges:
370 if child in targets:
371 return True
372 visited.add(child)
373 stack.append((child, iter(G[child]), depth + 1))
374 except StopIteration:
375 stack.pop()
376 return False
377
378 # Check for alternating paths starting with edges in the matching, then
379 # check for alternating paths starting with edges not in the
380 # matching.
381 return _alternating_dfs(v, along_matched=True) or _alternating_dfs(
382 v, along_matched=False
383 )
384
385
386def _connected_by_alternating_paths(G, matching, targets):
387 """Returns the set of vertices that are connected to one of the target
388 vertices by an alternating path in `G` or are themselves a target.
389
390 An *alternating path* is a path in which every other edge is in the
391 specified maximum matching (and the remaining edges in the path are not in
392 the matching). An alternating path may have matched edges in the even
393 positions or in the odd positions, as long as the edges alternate between
394 'matched' and 'unmatched'.
395
396 `G` is an undirected bipartite NetworkX graph.
397
398 `matching` is a dictionary representing a maximum matching in `G`, as
399 returned by, for example, :func:`maximum_matching`.
400
401 `targets` is a set of vertices.
402
403 """
404 # Get the set of matched edges and the set of unmatched edges. Only include
405 # one version of each undirected edge (for example, include edge (1, 2) but
406 # not edge (2, 1)). Using frozensets as an intermediary step we do not
407 # require nodes to be orderable.
408 edge_sets = {frozenset((u, v)) for u, v in matching.items()}
409 matched_edges = {tuple(edge) for edge in edge_sets}
410 unmatched_edges = {
411 (u, v) for (u, v) in G.edges() if frozenset((u, v)) not in edge_sets
412 }
413
414 return {
415 v
416 for v in G
417 if v in targets
418 or _is_connected_by_alternating_path(
419 G, v, matched_edges, unmatched_edges, targets
420 )
421 }
422
423
424@nx._dispatchable
425def to_vertex_cover(G, matching, top_nodes=None):
426 """Returns the minimum vertex cover corresponding to the given maximum
427 matching of the bipartite graph `G`.
428
429 Parameters
430 ----------
431 G : NetworkX graph
432
433 Undirected bipartite graph
434
435 matching : dictionary
436
437 A dictionary whose keys are vertices in `G` and whose values are the
438 distinct neighbors comprising the maximum matching for `G`, as returned
439 by, for example, :func:`maximum_matching`. The dictionary *must*
440 represent the maximum matching.
441
442 top_nodes : container
443
444 Container with all nodes in one bipartite node set. If not supplied
445 it will be computed. But if more than one solution exists an exception
446 will be raised.
447
448 Returns
449 -------
450 vertex_cover : :class:`set`
451
452 The minimum vertex cover in `G`.
453
454 Raises
455 ------
456 AmbiguousSolution
457 Raised if the input bipartite graph is disconnected and no container
458 with all nodes in one bipartite set is provided. When determining
459 the nodes in each bipartite set more than one valid solution is
460 possible if the input graph is disconnected.
461
462 Notes
463 -----
464 This function is implemented using the procedure guaranteed by `Konig's
465 theorem
466 <https://en.wikipedia.org/wiki/K%C3%B6nig%27s_theorem_%28graph_theory%29>`_,
467 which proves an equivalence between a maximum matching and a minimum vertex
468 cover in bipartite graphs.
469
470 Since a minimum vertex cover is the complement of a maximum independent set
471 for any graph, one can compute the maximum independent set of a bipartite
472 graph this way:
473
474 >>> G = nx.complete_bipartite_graph(2, 3)
475 >>> matching = nx.bipartite.maximum_matching(G)
476 >>> vertex_cover = nx.bipartite.to_vertex_cover(G, matching)
477 >>> independent_set = set(G) - vertex_cover
478 >>> print(list(independent_set))
479 [2, 3, 4]
480
481 See :mod:`bipartite documentation <networkx.algorithms.bipartite>`
482 for further details on how bipartite graphs are handled in NetworkX.
483
484 """
485 # This is a Python implementation of the algorithm described at
486 # <https://en.wikipedia.org/wiki/K%C3%B6nig%27s_theorem_%28graph_theory%29#Proof>.
487 L, R = bipartite_sets(G, top_nodes)
488 # Let U be the set of unmatched vertices in the left vertex set.
489 unmatched_vertices = set(G) - set(matching)
490 U = unmatched_vertices & L
491 # Let Z be the set of vertices that are either in U or are connected to U
492 # by alternating paths.
493 Z = _connected_by_alternating_paths(G, matching, U)
494 # At this point, every edge either has a right endpoint in Z or a left
495 # endpoint not in Z. This gives us the vertex cover.
496 return (L - Z) | (R & Z)
497
498
499#: Returns the maximum cardinality matching in the given bipartite graph.
500#:
501#: This function is simply an alias for :func:`hopcroft_karp_matching`.
502maximum_matching = hopcroft_karp_matching
503
504
505@nx._dispatchable(edge_attrs="weight")
506def minimum_weight_full_matching(G, top_nodes=None, weight="weight"):
507 r"""Returns a minimum weight full matching of the bipartite graph `G`.
508
509 Let :math:`G = ((U, V), E)` be a weighted bipartite graph with real weights
510 :math:`w : E \to \mathbb{R}`. This function then produces a matching
511 :math:`M \subseteq E` with cardinality
512
513 .. math::
514 \lvert M \rvert = \min(\lvert U \rvert, \lvert V \rvert),
515
516 which minimizes the sum of the weights of the edges included in the
517 matching, :math:`\sum_{e \in M} w(e)`, or raises an error if no such
518 matching exists.
519
520 When :math:`\lvert U \rvert = \lvert V \rvert`, this is commonly
521 referred to as a perfect matching; here, since we allow
522 :math:`\lvert U \rvert` and :math:`\lvert V \rvert` to differ, we
523 follow Karp [1]_ and refer to the matching as *full*.
524
525 Parameters
526 ----------
527 G : NetworkX graph
528
529 Undirected bipartite graph
530
531 top_nodes : container
532
533 Container with all nodes in one bipartite node set. If not supplied
534 it will be computed.
535
536 weight : string, optional (default='weight')
537
538 The edge data key used to provide each value in the matrix.
539 If None, then each edge has weight 1.
540
541 Returns
542 -------
543 matches : dictionary
544
545 The matching is returned as a dictionary, `matches`, such that
546 ``matches[v] == w`` if node `v` is matched to node `w`. Unmatched
547 nodes do not occur as a key in `matches`.
548
549 Raises
550 ------
551 ValueError
552 Raised if no full matching exists.
553
554 ImportError
555 Raised if SciPy is not available.
556
557 Notes
558 -----
559 The problem of determining a minimum weight full matching is also known as
560 the rectangular linear assignment problem. This implementation defers the
561 calculation of the assignment to SciPy.
562
563 References
564 ----------
565 .. [1] Richard Manning Karp:
566 An algorithm to Solve the m x n Assignment Problem in Expected Time
567 O(mn log n).
568 Networks, 10(2):143–152, 1980.
569
570 """
571 import numpy as np
572 import scipy as sp
573
574 left, right = nx.bipartite.sets(G, top_nodes)
575 U = list(left)
576 V = list(right)
577 # We explicitly create the biadjacency matrix having infinities
578 # where edges are missing (as opposed to zeros, which is what one would
579 # get by using toarray on the sparse matrix).
580 weights_sparse = biadjacency_matrix(
581 G, row_order=U, column_order=V, weight=weight, format="coo"
582 )
583 weights = np.full(weights_sparse.shape, np.inf)
584 weights[weights_sparse.row, weights_sparse.col] = weights_sparse.data
585 left_matches = sp.optimize.linear_sum_assignment(weights)
586 d = {U[u]: V[v] for u, v in zip(*left_matches)}
587 # d will contain the matching from edges in left to right; we need to
588 # add the ones from right to left as well.
589 d.update({v: u for u, v in d.items()})
590 return d