Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/bipartite/matching.py: 13%

142 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-20 07:00 +0000

1# This module uses material from the Wikipedia article Hopcroft--Karp algorithm 

2# <https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm>, accessed on 

3# January 3, 2015, which is released under the Creative Commons 

4# Attribution-Share-Alike License 3.0 

5# <http://creativecommons.org/licenses/by-sa/3.0/>. That article includes 

6# pseudocode, which has been translated into the corresponding Python code. 

7# 

8# Portions of this module use code from David Eppstein's Python Algorithms and 

9# Data Structures (PADS) library, which is dedicated to the public domain (for 

10# proof, see <http://www.ics.uci.edu/~eppstein/PADS/ABOUT-PADS.txt>). 

11"""Provides functions for computing maximum cardinality matchings and minimum 

12weight full matchings in a bipartite graph. 

13 

14If you don't care about the particular implementation of the maximum matching 

15algorithm, simply use the :func:`maximum_matching`. If you do care, you can 

16import one of the named maximum matching algorithms directly. 

17 

18For example, to find a maximum matching in the complete bipartite graph with 

19two vertices on the left and three vertices on the right: 

20 

21>>> G = nx.complete_bipartite_graph(2, 3) 

22>>> left, right = nx.bipartite.sets(G) 

23>>> list(left) 

24[0, 1] 

25>>> list(right) 

26[2, 3, 4] 

27>>> nx.bipartite.maximum_matching(G) 

28{0: 2, 1: 3, 2: 0, 3: 1} 

29 

30The dictionary returned by :func:`maximum_matching` includes a mapping for 

31vertices in both the left and right vertex sets. 

32 

33Similarly, :func:`minimum_weight_full_matching` produces, for a complete 

34weighted bipartite graph, a matching whose cardinality is the cardinality of 

35the smaller of the two partitions, and for which the sum of the weights of the 

36edges included in the matching is minimal. 

37 

38""" 

39import collections 

40import itertools 

41 

42import networkx as nx 

43from networkx.algorithms.bipartite import sets as bipartite_sets 

44from networkx.algorithms.bipartite.matrix import biadjacency_matrix 

45 

46__all__ = [ 

47 "maximum_matching", 

48 "hopcroft_karp_matching", 

49 "eppstein_matching", 

50 "to_vertex_cover", 

51 "minimum_weight_full_matching", 

52] 

53 

54INFINITY = float("inf") 

55 

56 

57@nx._dispatch 

58def hopcroft_karp_matching(G, top_nodes=None): 

59 """Returns the maximum cardinality matching of the bipartite graph `G`. 

60 

61 A matching is a set of edges that do not share any nodes. A maximum 

62 cardinality matching is a matching with the most edges possible. It 

63 is not always unique. Finding a matching in a bipartite graph can be 

64 treated as a networkx flow problem. 

65 

66 The functions ``hopcroft_karp_matching`` and ``maximum_matching`` 

67 are aliases of the same function. 

68 

69 Parameters 

70 ---------- 

71 G : NetworkX graph 

72 

73 Undirected bipartite graph 

74 

75 top_nodes : container of nodes 

76 

77 Container with all nodes in one bipartite node set. If not supplied 

78 it will be computed. But if more than one solution exists an exception 

79 will be raised. 

80 

81 Returns 

82 ------- 

83 matches : dictionary 

84 

85 The matching is returned as a dictionary, `matches`, such that 

86 ``matches[v] == w`` if node `v` is matched to node `w`. Unmatched 

87 nodes do not occur as a key in `matches`. 

88 

89 Raises 

90 ------ 

91 AmbiguousSolution 

92 Raised if the input bipartite graph is disconnected and no container 

93 with all nodes in one bipartite set is provided. When determining 

94 the nodes in each bipartite set more than one valid solution is 

95 possible if the input graph is disconnected. 

96 

97 Notes 

98 ----- 

99 This function is implemented with the `Hopcroft--Karp matching algorithm 

100 <https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm>`_ for 

101 bipartite graphs. 

102 

103 See :mod:`bipartite documentation <networkx.algorithms.bipartite>` 

104 for further details on how bipartite graphs are handled in NetworkX. 

105 

106 See Also 

107 -------- 

108 maximum_matching 

109 hopcroft_karp_matching 

110 eppstein_matching 

111 

112 References 

113 ---------- 

114 .. [1] John E. Hopcroft and Richard M. Karp. "An n^{5 / 2} Algorithm for 

115 Maximum Matchings in Bipartite Graphs" In: **SIAM Journal of Computing** 

116 2.4 (1973), pp. 225--231. <https://doi.org/10.1137/0202019>. 

117 

118 """ 

119 

120 # First we define some auxiliary search functions. 

121 # 

122 # If you are a human reading these auxiliary search functions, the "global" 

123 # variables `leftmatches`, `rightmatches`, `distances`, etc. are defined 

124 # below the functions, so that they are initialized close to the initial 

125 # invocation of the search functions. 

126 def breadth_first_search(): 

127 for v in left: 

128 if leftmatches[v] is None: 

129 distances[v] = 0 

130 queue.append(v) 

131 else: 

132 distances[v] = INFINITY 

133 distances[None] = INFINITY 

134 while queue: 

135 v = queue.popleft() 

136 if distances[v] < distances[None]: 

137 for u in G[v]: 

138 if distances[rightmatches[u]] is INFINITY: 

139 distances[rightmatches[u]] = distances[v] + 1 

140 queue.append(rightmatches[u]) 

141 return distances[None] is not INFINITY 

142 

143 def depth_first_search(v): 

144 if v is not None: 

145 for u in G[v]: 

146 if distances[rightmatches[u]] == distances[v] + 1: 

147 if depth_first_search(rightmatches[u]): 

148 rightmatches[u] = v 

149 leftmatches[v] = u 

150 return True 

151 distances[v] = INFINITY 

152 return False 

153 return True 

154 

155 # Initialize the "global" variables that maintain state during the search. 

156 left, right = bipartite_sets(G, top_nodes) 

157 leftmatches = {v: None for v in left} 

158 rightmatches = {v: None for v in right} 

159 distances = {} 

160 queue = collections.deque() 

161 

162 # Implementation note: this counter is incremented as pairs are matched but 

163 # it is currently not used elsewhere in the computation. 

164 num_matched_pairs = 0 

165 while breadth_first_search(): 

166 for v in left: 

167 if leftmatches[v] is None: 

168 if depth_first_search(v): 

169 num_matched_pairs += 1 

170 

171 # Strip the entries matched to `None`. 

172 leftmatches = {k: v for k, v in leftmatches.items() if v is not None} 

173 rightmatches = {k: v for k, v in rightmatches.items() if v is not None} 

174 

175 # At this point, the left matches and the right matches are inverses of one 

176 # another. In other words, 

177 # 

178 # leftmatches == {v, k for k, v in rightmatches.items()} 

179 # 

180 # Finally, we combine both the left matches and right matches. 

181 return dict(itertools.chain(leftmatches.items(), rightmatches.items())) 

182 

183 

184@nx._dispatch 

185def eppstein_matching(G, top_nodes=None): 

186 """Returns the maximum cardinality matching of the bipartite graph `G`. 

187 

188 Parameters 

189 ---------- 

190 G : NetworkX graph 

191 

192 Undirected bipartite graph 

193 

194 top_nodes : container 

195 

196 Container with all nodes in one bipartite node set. If not supplied 

197 it will be computed. But if more than one solution exists an exception 

198 will be raised. 

199 

200 Returns 

201 ------- 

202 matches : dictionary 

203 

204 The matching is returned as a dictionary, `matching`, such that 

205 ``matching[v] == w`` if node `v` is matched to node `w`. Unmatched 

206 nodes do not occur as a key in `matching`. 

207 

208 Raises 

209 ------ 

210 AmbiguousSolution 

211 Raised if the input bipartite graph is disconnected and no container 

212 with all nodes in one bipartite set is provided. When determining 

213 the nodes in each bipartite set more than one valid solution is 

214 possible if the input graph is disconnected. 

215 

216 Notes 

217 ----- 

218 This function is implemented with David Eppstein's version of the algorithm 

219 Hopcroft--Karp algorithm (see :func:`hopcroft_karp_matching`), which 

220 originally appeared in the `Python Algorithms and Data Structures library 

221 (PADS) <http://www.ics.uci.edu/~eppstein/PADS/ABOUT-PADS.txt>`_. 

222 

223 See :mod:`bipartite documentation <networkx.algorithms.bipartite>` 

224 for further details on how bipartite graphs are handled in NetworkX. 

225 

226 See Also 

227 -------- 

228 

229 hopcroft_karp_matching 

230 

231 """ 

232 # Due to its original implementation, a directed graph is needed 

233 # so that the two sets of bipartite nodes can be distinguished 

234 left, right = bipartite_sets(G, top_nodes) 

235 G = nx.DiGraph(G.edges(left)) 

236 # initialize greedy matching (redundant, but faster than full search) 

237 matching = {} 

238 for u in G: 

239 for v in G[u]: 

240 if v not in matching: 

241 matching[v] = u 

242 break 

243 while True: 

244 # structure residual graph into layers 

245 # pred[u] gives the neighbor in the previous layer for u in U 

246 # preds[v] gives a list of neighbors in the previous layer for v in V 

247 # unmatched gives a list of unmatched vertices in final layer of V, 

248 # and is also used as a flag value for pred[u] when u is in the first 

249 # layer 

250 preds = {} 

251 unmatched = [] 

252 pred = {u: unmatched for u in G} 

253 for v in matching: 

254 del pred[matching[v]] 

255 layer = list(pred) 

256 

257 # repeatedly extend layering structure by another pair of layers 

258 while layer and not unmatched: 

259 newLayer = {} 

260 for u in layer: 

261 for v in G[u]: 

262 if v not in preds: 

263 newLayer.setdefault(v, []).append(u) 

264 layer = [] 

265 for v in newLayer: 

266 preds[v] = newLayer[v] 

267 if v in matching: 

268 layer.append(matching[v]) 

269 pred[matching[v]] = v 

270 else: 

271 unmatched.append(v) 

272 

273 # did we finish layering without finding any alternating paths? 

274 if not unmatched: 

275 # TODO - The lines between --- were unused and were thus commented 

276 # out. This whole commented chunk should be reviewed to determine 

277 # whether it should be built upon or completely removed. 

278 # --- 

279 # unlayered = {} 

280 # for u in G: 

281 # # TODO Why is extra inner loop necessary? 

282 # for v in G[u]: 

283 # if v not in preds: 

284 # unlayered[v] = None 

285 # --- 

286 # TODO Originally, this function returned a three-tuple: 

287 # 

288 # return (matching, list(pred), list(unlayered)) 

289 # 

290 # For some reason, the documentation for this function 

291 # indicated that the second and third elements of the returned 

292 # three-tuple would be the vertices in the left and right vertex 

293 # sets, respectively, that are also in the maximum independent set. 

294 # However, what I think the author meant was that the second 

295 # element is the list of vertices that were unmatched and the third 

296 # element was the list of vertices that were matched. Since that 

297 # seems to be the case, they don't really need to be returned, 

298 # since that information can be inferred from the matching 

299 # dictionary. 

300 

301 # All the matched nodes must be a key in the dictionary 

302 for key in matching.copy(): 

303 matching[matching[key]] = key 

304 return matching 

305 

306 # recursively search backward through layers to find alternating paths 

307 # recursion returns true if found path, false otherwise 

308 def recurse(v): 

309 if v in preds: 

310 L = preds.pop(v) 

311 for u in L: 

312 if u in pred: 

313 pu = pred.pop(u) 

314 if pu is unmatched or recurse(pu): 

315 matching[v] = u 

316 return True 

317 return False 

318 

319 for v in unmatched: 

320 recurse(v) 

321 

322 

323def _is_connected_by_alternating_path(G, v, matched_edges, unmatched_edges, targets): 

324 """Returns True if and only if the vertex `v` is connected to one of 

325 the target vertices by an alternating path in `G`. 

326 

327 An *alternating path* is a path in which every other edge is in the 

328 specified maximum matching (and the remaining edges in the path are not in 

329 the matching). An alternating path may have matched edges in the even 

330 positions or in the odd positions, as long as the edges alternate between 

331 'matched' and 'unmatched'. 

332 

333 `G` is an undirected bipartite NetworkX graph. 

334 

335 `v` is a vertex in `G`. 

336 

337 `matched_edges` is a set of edges present in a maximum matching in `G`. 

338 

339 `unmatched_edges` is a set of edges not present in a maximum 

340 matching in `G`. 

341 

342 `targets` is a set of vertices. 

343 

344 """ 

345 

346 def _alternating_dfs(u, along_matched=True): 

347 """Returns True if and only if `u` is connected to one of the 

348 targets by an alternating path. 

349 

350 `u` is a vertex in the graph `G`. 

351 

352 If `along_matched` is True, this step of the depth-first search 

353 will continue only through edges in the given matching. Otherwise, it 

354 will continue only through edges *not* in the given matching. 

355 

356 """ 

357 visited = set() 

358 # Follow matched edges when depth is even, 

359 # and follow unmatched edges when depth is odd. 

360 initial_depth = 0 if along_matched else 1 

361 stack = [(u, iter(G[u]), initial_depth)] 

362 while stack: 

363 parent, children, depth = stack[-1] 

364 valid_edges = matched_edges if depth % 2 else unmatched_edges 

365 try: 

366 child = next(children) 

367 if child not in visited: 

368 if (parent, child) in valid_edges or (child, parent) in valid_edges: 

369 if child in targets: 

370 return True 

371 visited.add(child) 

372 stack.append((child, iter(G[child]), depth + 1)) 

373 except StopIteration: 

374 stack.pop() 

375 return False 

376 

377 # Check for alternating paths starting with edges in the matching, then 

378 # check for alternating paths starting with edges not in the 

379 # matching. 

380 return _alternating_dfs(v, along_matched=True) or _alternating_dfs( 

381 v, along_matched=False 

382 ) 

383 

384 

385def _connected_by_alternating_paths(G, matching, targets): 

386 """Returns the set of vertices that are connected to one of the target 

387 vertices by an alternating path in `G` or are themselves a target. 

388 

389 An *alternating path* is a path in which every other edge is in the 

390 specified maximum matching (and the remaining edges in the path are not in 

391 the matching). An alternating path may have matched edges in the even 

392 positions or in the odd positions, as long as the edges alternate between 

393 'matched' and 'unmatched'. 

394 

395 `G` is an undirected bipartite NetworkX graph. 

396 

397 `matching` is a dictionary representing a maximum matching in `G`, as 

398 returned by, for example, :func:`maximum_matching`. 

399 

400 `targets` is a set of vertices. 

401 

402 """ 

403 # Get the set of matched edges and the set of unmatched edges. Only include 

404 # one version of each undirected edge (for example, include edge (1, 2) but 

405 # not edge (2, 1)). Using frozensets as an intermediary step we do not 

406 # require nodes to be orderable. 

407 edge_sets = {frozenset((u, v)) for u, v in matching.items()} 

408 matched_edges = {tuple(edge) for edge in edge_sets} 

409 unmatched_edges = { 

410 (u, v) for (u, v) in G.edges() if frozenset((u, v)) not in edge_sets 

411 } 

412 

413 return { 

414 v 

415 for v in G 

416 if v in targets 

417 or _is_connected_by_alternating_path( 

418 G, v, matched_edges, unmatched_edges, targets 

419 ) 

420 } 

421 

422 

423@nx._dispatch 

424def to_vertex_cover(G, matching, top_nodes=None): 

425 """Returns the minimum vertex cover corresponding to the given maximum 

426 matching of the bipartite graph `G`. 

427 

428 Parameters 

429 ---------- 

430 G : NetworkX graph 

431 

432 Undirected bipartite graph 

433 

434 matching : dictionary 

435 

436 A dictionary whose keys are vertices in `G` and whose values are the 

437 distinct neighbors comprising the maximum matching for `G`, as returned 

438 by, for example, :func:`maximum_matching`. The dictionary *must* 

439 represent the maximum matching. 

440 

441 top_nodes : container 

442 

443 Container with all nodes in one bipartite node set. If not supplied 

444 it will be computed. But if more than one solution exists an exception 

445 will be raised. 

446 

447 Returns 

448 ------- 

449 vertex_cover : :class:`set` 

450 

451 The minimum vertex cover in `G`. 

452 

453 Raises 

454 ------ 

455 AmbiguousSolution 

456 Raised if the input bipartite graph is disconnected and no container 

457 with all nodes in one bipartite set is provided. When determining 

458 the nodes in each bipartite set more than one valid solution is 

459 possible if the input graph is disconnected. 

460 

461 Notes 

462 ----- 

463 This function is implemented using the procedure guaranteed by `Konig's 

464 theorem 

465 <https://en.wikipedia.org/wiki/K%C3%B6nig%27s_theorem_%28graph_theory%29>`_, 

466 which proves an equivalence between a maximum matching and a minimum vertex 

467 cover in bipartite graphs. 

468 

469 Since a minimum vertex cover is the complement of a maximum independent set 

470 for any graph, one can compute the maximum independent set of a bipartite 

471 graph this way: 

472 

473 >>> G = nx.complete_bipartite_graph(2, 3) 

474 >>> matching = nx.bipartite.maximum_matching(G) 

475 >>> vertex_cover = nx.bipartite.to_vertex_cover(G, matching) 

476 >>> independent_set = set(G) - vertex_cover 

477 >>> print(list(independent_set)) 

478 [2, 3, 4] 

479 

480 See :mod:`bipartite documentation <networkx.algorithms.bipartite>` 

481 for further details on how bipartite graphs are handled in NetworkX. 

482 

483 """ 

484 # This is a Python implementation of the algorithm described at 

485 # <https://en.wikipedia.org/wiki/K%C3%B6nig%27s_theorem_%28graph_theory%29#Proof>. 

486 L, R = bipartite_sets(G, top_nodes) 

487 # Let U be the set of unmatched vertices in the left vertex set. 

488 unmatched_vertices = set(G) - set(matching) 

489 U = unmatched_vertices & L 

490 # Let Z be the set of vertices that are either in U or are connected to U 

491 # by alternating paths. 

492 Z = _connected_by_alternating_paths(G, matching, U) 

493 # At this point, every edge either has a right endpoint in Z or a left 

494 # endpoint not in Z. This gives us the vertex cover. 

495 return (L - Z) | (R & Z) 

496 

497 

498#: Returns the maximum cardinality matching in the given bipartite graph. 

499#: 

500#: This function is simply an alias for :func:`hopcroft_karp_matching`. 

501maximum_matching = hopcroft_karp_matching 

502 

503 

504@nx._dispatch(edge_attrs="weight") 

505def minimum_weight_full_matching(G, top_nodes=None, weight="weight"): 

506 r"""Returns a minimum weight full matching of the bipartite graph `G`. 

507 

508 Let :math:`G = ((U, V), E)` be a weighted bipartite graph with real weights 

509 :math:`w : E \to \mathbb{R}`. This function then produces a matching 

510 :math:`M \subseteq E` with cardinality 

511 

512 .. math:: 

513 \lvert M \rvert = \min(\lvert U \rvert, \lvert V \rvert), 

514 

515 which minimizes the sum of the weights of the edges included in the 

516 matching, :math:`\sum_{e \in M} w(e)`, or raises an error if no such 

517 matching exists. 

518 

519 When :math:`\lvert U \rvert = \lvert V \rvert`, this is commonly 

520 referred to as a perfect matching; here, since we allow 

521 :math:`\lvert U \rvert` and :math:`\lvert V \rvert` to differ, we 

522 follow Karp [1]_ and refer to the matching as *full*. 

523 

524 Parameters 

525 ---------- 

526 G : NetworkX graph 

527 

528 Undirected bipartite graph 

529 

530 top_nodes : container 

531 

532 Container with all nodes in one bipartite node set. If not supplied 

533 it will be computed. 

534 

535 weight : string, optional (default='weight') 

536 

537 The edge data key used to provide each value in the matrix. 

538 If None, then each edge has weight 1. 

539 

540 Returns 

541 ------- 

542 matches : dictionary 

543 

544 The matching is returned as a dictionary, `matches`, such that 

545 ``matches[v] == w`` if node `v` is matched to node `w`. Unmatched 

546 nodes do not occur as a key in `matches`. 

547 

548 Raises 

549 ------ 

550 ValueError 

551 Raised if no full matching exists. 

552 

553 ImportError 

554 Raised if SciPy is not available. 

555 

556 Notes 

557 ----- 

558 The problem of determining a minimum weight full matching is also known as 

559 the rectangular linear assignment problem. This implementation defers the 

560 calculation of the assignment to SciPy. 

561 

562 References 

563 ---------- 

564 .. [1] Richard Manning Karp: 

565 An algorithm to Solve the m x n Assignment Problem in Expected Time 

566 O(mn log n). 

567 Networks, 10(2):143–152, 1980. 

568 

569 """ 

570 import numpy as np 

571 import scipy as sp 

572 

573 left, right = nx.bipartite.sets(G, top_nodes) 

574 U = list(left) 

575 V = list(right) 

576 # We explicitly create the biadjacency matrix having infinities 

577 # where edges are missing (as opposed to zeros, which is what one would 

578 # get by using toarray on the sparse matrix). 

579 weights_sparse = biadjacency_matrix( 

580 G, row_order=U, column_order=V, weight=weight, format="coo" 

581 ) 

582 weights = np.full(weights_sparse.shape, np.inf) 

583 weights[weights_sparse.row, weights_sparse.col] = weights_sparse.data 

584 left_matches = sp.optimize.linear_sum_assignment(weights) 

585 d = {U[u]: V[v] for u, v in zip(*left_matches)} 

586 # d will contain the matching from edges in left to right; we need to 

587 # add the ones from right to left as well. 

588 d.update({v: u for u, v in d.items()}) 

589 return d