Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/algorithms/bipartite/matching.py: 13%

1# This module uses material from the Wikipedia article Hopcroft--Karp algorithm

2# <https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm>, accessed on

3# January 3, 2015, which is released under the Creative Commons

4# Attribution-Share-Alike License 3.0

5# <http://creativecommons.org/licenses/by-sa/3.0/>. That article includes

6# pseudocode, which has been translated into the corresponding Python code.

8# Portions of this module use code from David Eppstein's Python Algorithms and

9# Data Structures (PADS) library, which is dedicated to the public domain (for

10# proof, see <http://www.ics.uci.edu/~eppstein/PADS/ABOUT-PADS.txt>).

11"""Provides functions for computing maximum cardinality matchings and minimum

12weight full matchings in a bipartite graph.

14If you don't care about the particular implementation of the maximum matching

15algorithm, simply use the :func:`maximum_matching`. If you do care, you can

16import one of the named maximum matching algorithms directly.

18For example, to find a maximum matching in the complete bipartite graph with

19two vertices on the left and three vertices on the right:

21>>> G = nx.complete_bipartite_graph(2, 3)

22>>> left, right = nx.bipartite.sets(G)

23>>> list(left)

24[0, 1]

25>>> list(right)

26[2, 3, 4]

27>>> nx.bipartite.maximum_matching(G)

28{0: 2, 1: 3, 2: 0, 3: 1}

30The dictionary returned by :func:`maximum_matching` includes a mapping for

31vertices in both the left and right vertex sets.

33Similarly, :func:`minimum_weight_full_matching` produces, for a complete

34weighted bipartite graph, a matching whose cardinality is the cardinality of

35the smaller of the two partitions, and for which the sum of the weights of the

36edges included in the matching is minimal.

38"""

40import collections

41import itertools

43import networkx as nx

44from networkx.algorithms.bipartite import sets as bipartite_sets

45from networkx.algorithms.bipartite.matrix import biadjacency_matrix

47__all__ = [

48 "maximum_matching",

49 "hopcroft_karp_matching",

50 "eppstein_matching",

51 "to_vertex_cover",

52 "minimum_weight_full_matching",

53]

55INFINITY = float("inf")

58@nx._dispatchable

59def hopcroft_karp_matching(G, top_nodes=None):

60 """Returns the maximum cardinality matching of the bipartite graph `G`.

62 A matching is a set of edges that do not share any nodes. A maximum

63 cardinality matching is a matching with the most edges possible. It

64 is not always unique. Finding a matching in a bipartite graph can be

65 treated as a networkx flow problem.

67 The functions ``hopcroft_karp_matching`` and ``maximum_matching``

68 are aliases of the same function.

70 Parameters

71 ----------

72 G : NetworkX graph

74 Undirected bipartite graph

76 top_nodes : container of nodes

78 Container with all nodes in one bipartite node set. If not supplied

79 it will be computed. But if more than one solution exists an exception

80 will be raised.

82 Returns

83 -------

84 matches : dictionary

86 The matching is returned as a dictionary, `matches`, such that

87 ``matches[v] == w`` if node `v` is matched to node `w`. Unmatched

88 nodes do not occur as a key in `matches`.

90 Raises

91 ------

92 AmbiguousSolution

93 Raised if the input bipartite graph is disconnected and no container

94 with all nodes in one bipartite set is provided. When determining

95 the nodes in each bipartite set more than one valid solution is

96 possible if the input graph is disconnected.

98 Notes

99 -----

100 This function is implemented with the `Hopcroft--Karp matching algorithm

101 <https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm>`_ for

102 bipartite graphs.

103

104 See :mod:`bipartite documentation <networkx.algorithms.bipartite>`

105 for further details on how bipartite graphs are handled in NetworkX.

106

107 See Also

108 --------

109 maximum_matching

110 hopcroft_karp_matching

111 eppstein_matching

112

113 References

114 ----------

115 .. [1] John E. Hopcroft and Richard M. Karp. "An n^{5 / 2} Algorithm for

116 Maximum Matchings in Bipartite Graphs" In: **SIAM Journal of Computing**

117 2.4 (1973), pp. 225--231. <https://doi.org/10.1137/0202019>.

118

119 """

120

121 # First we define some auxiliary search functions.

122 #

123 # If you are a human reading these auxiliary search functions, the "global"

124 # variables `leftmatches`, `rightmatches`, `distances`, etc. are defined

125 # below the functions, so that they are initialized close to the initial

126 # invocation of the search functions.

127 def breadth_first_search():

128 for v in left:

129 if leftmatches[v] is None:

130 distances[v] = 0

131 queue.append(v)

132 else:

133 distances[v] = INFINITY

134 distances[None] = INFINITY

135 while queue:

136 v = queue.popleft()

137 if distances[v] < distances[None]:

138 for u in G[v]:

139 if distances[rightmatches[u]] is INFINITY:

140 distances[rightmatches[u]] = distances[v] + 1

141 queue.append(rightmatches[u])

142 return distances[None] is not INFINITY

143

144 def depth_first_search(v):

145 if v is not None:

146 for u in G[v]:

147 if distances[rightmatches[u]] == distances[v] + 1:

148 if depth_first_search(rightmatches[u]):

149 rightmatches[u] = v

150 leftmatches[v] = u

151 return True

152 distances[v] = INFINITY

153 return False

154 return True

155

156 # Initialize the "global" variables that maintain state during the search.

157 left, right = bipartite_sets(G, top_nodes)

158 leftmatches = {v: None for v in left}

159 rightmatches = {v: None for v in right}

160 distances = {}

161 queue = collections.deque()

162

163 # Implementation note: this counter is incremented as pairs are matched but

164 # it is currently not used elsewhere in the computation.

165 num_matched_pairs = 0

166 while breadth_first_search():

167 for v in left:

168 if leftmatches[v] is None:

169 if depth_first_search(v):

170 num_matched_pairs += 1

171

172 # Strip the entries matched to `None`.

173 leftmatches = {k: v for k, v in leftmatches.items() if v is not None}

174 rightmatches = {k: v for k, v in rightmatches.items() if v is not None}

175

176 # At this point, the left matches and the right matches are inverses of one

177 # another. In other words,

178 #

179 # leftmatches == {v, k for k, v in rightmatches.items()}

180 #

181 # Finally, we combine both the left matches and right matches.

182 return dict(itertools.chain(leftmatches.items(), rightmatches.items()))

183

184

185@nx._dispatchable

186def eppstein_matching(G, top_nodes=None):

187 """Returns the maximum cardinality matching of the bipartite graph `G`.

188

189 Parameters

190 ----------

191 G : NetworkX graph

192

193 Undirected bipartite graph

194

195 top_nodes : container

196

197 Container with all nodes in one bipartite node set. If not supplied

198 it will be computed. But if more than one solution exists an exception

199 will be raised.

200

201 Returns

202 -------

203 matches : dictionary

204

205 The matching is returned as a dictionary, `matching`, such that

206 ``matching[v] == w`` if node `v` is matched to node `w`. Unmatched

207 nodes do not occur as a key in `matching`.

208

209 Raises

210 ------

211 AmbiguousSolution

212 Raised if the input bipartite graph is disconnected and no container

213 with all nodes in one bipartite set is provided. When determining

214 the nodes in each bipartite set more than one valid solution is

215 possible if the input graph is disconnected.

216

217 Notes

218 -----

219 This function is implemented with David Eppstein's version of the algorithm

220 Hopcroft--Karp algorithm (see :func:`hopcroft_karp_matching`), which

221 originally appeared in the `Python Algorithms and Data Structures library

222 (PADS) <http://www.ics.uci.edu/~eppstein/PADS/ABOUT-PADS.txt>`_.

223

224 See :mod:`bipartite documentation <networkx.algorithms.bipartite>`

225 for further details on how bipartite graphs are handled in NetworkX.

226

227 See Also

228 --------

229

230 hopcroft_karp_matching

231

232 """

233 # Due to its original implementation, a directed graph is needed

234 # so that the two sets of bipartite nodes can be distinguished

235 left, right = bipartite_sets(G, top_nodes)

236 G = nx.DiGraph(G.edges(left))

237 # initialize greedy matching (redundant, but faster than full search)

238 matching = {}

239 for u in G:

240 for v in G[u]:

241 if v not in matching:

242 matching[v] = u

243 break

244 while True:

245 # structure residual graph into layers

246 # pred[u] gives the neighbor in the previous layer for u in U

247 # preds[v] gives a list of neighbors in the previous layer for v in V

248 # unmatched gives a list of unmatched vertices in final layer of V,

249 # and is also used as a flag value for pred[u] when u is in the first

250 # layer

251 preds = {}

252 unmatched = []

253 pred = {u: unmatched for u in G}

254 for v in matching:

255 del pred[matching[v]]

256 layer = list(pred)

257

258 # repeatedly extend layering structure by another pair of layers

259 while layer and not unmatched:

260 newLayer = {}

261 for u in layer:

262 for v in G[u]:

263 if v not in preds:

264 newLayer.setdefault(v, []).append(u)

265 layer = []

266 for v in newLayer:

267 preds[v] = newLayer[v]

268 if v in matching:

269 layer.append(matching[v])

270 pred[matching[v]] = v

271 else:

272 unmatched.append(v)

273

274 # did we finish layering without finding any alternating paths?

275 if not unmatched:

276 # TODO - The lines between --- were unused and were thus commented

277 # out. This whole commented chunk should be reviewed to determine

278 # whether it should be built upon or completely removed.

279 # ---

280 # unlayered = {}

281 # for u in G:

282 # # TODO Why is extra inner loop necessary?

283 # for v in G[u]:

284 # if v not in preds:

285 # unlayered[v] = None

286 # ---

287 # TODO Originally, this function returned a three-tuple:

288 #

289 # return (matching, list(pred), list(unlayered))

290 #

291 # For some reason, the documentation for this function

292 # indicated that the second and third elements of the returned

293 # three-tuple would be the vertices in the left and right vertex

294 # sets, respectively, that are also in the maximum independent set.

295 # However, what I think the author meant was that the second

296 # element is the list of vertices that were unmatched and the third

297 # element was the list of vertices that were matched. Since that

298 # seems to be the case, they don't really need to be returned,

299 # since that information can be inferred from the matching

300 # dictionary.

301

302 # All the matched nodes must be a key in the dictionary

303 for key in matching.copy():

304 matching[matching[key]] = key

305 return matching

306

307 # recursively search backward through layers to find alternating paths

308 # recursion returns true if found path, false otherwise

309 def recurse(v):

310 if v in preds:

311 L = preds.pop(v)

312 for u in L:

313 if u in pred:

314 pu = pred.pop(u)

315 if pu is unmatched or recurse(pu):

316 matching[v] = u

317 return True

318 return False

319

320 for v in unmatched:

321 recurse(v)

322

323

324def _is_connected_by_alternating_path(G, v, matched_edges, unmatched_edges, targets):

325 """Returns True if and only if the vertex `v` is connected to one of

326 the target vertices by an alternating path in `G`.

327

328 An *alternating path* is a path in which every other edge is in the

329 specified maximum matching (and the remaining edges in the path are not in

330 the matching). An alternating path may have matched edges in the even

331 positions or in the odd positions, as long as the edges alternate between

332 'matched' and 'unmatched'.

333

334 `G` is an undirected bipartite NetworkX graph.

335

336 `v` is a vertex in `G`.

337

338 `matched_edges` is a set of edges present in a maximum matching in `G`.

339

340 `unmatched_edges` is a set of edges not present in a maximum

341 matching in `G`.

342

343 `targets` is a set of vertices.

344

345 """

346

347 def _alternating_dfs(u, along_matched=True):

348 """Returns True if and only if `u` is connected to one of the

349 targets by an alternating path.

350

351 `u` is a vertex in the graph `G`.

352

353 If `along_matched` is True, this step of the depth-first search

354 will continue only through edges in the given matching. Otherwise, it

355 will continue only through edges *not* in the given matching.

356

357 """

358 visited = set()

359 # Follow matched edges when depth is even,

360 # and follow unmatched edges when depth is odd.

361 initial_depth = 0 if along_matched else 1

362 stack = [(u, iter(G[u]), initial_depth)]

363 while stack:

364 parent, children, depth = stack[-1]

365 valid_edges = matched_edges if depth % 2 else unmatched_edges

366 try:

367 child = next(children)

368 if child not in visited:

369 if (parent, child) in valid_edges or (child, parent) in valid_edges:

370 if child in targets:

371 return True

372 visited.add(child)

373 stack.append((child, iter(G[child]), depth + 1))

374 except StopIteration:

375 stack.pop()

376 return False

377

378 # Check for alternating paths starting with edges in the matching, then

379 # check for alternating paths starting with edges not in the

380 # matching.

381 return _alternating_dfs(v, along_matched=True) or _alternating_dfs(

382 v, along_matched=False

383 )

384

385

386def _connected_by_alternating_paths(G, matching, targets):

387 """Returns the set of vertices that are connected to one of the target

388 vertices by an alternating path in `G` or are themselves a target.

389

390 An *alternating path* is a path in which every other edge is in the

391 specified maximum matching (and the remaining edges in the path are not in

392 the matching). An alternating path may have matched edges in the even

393 positions or in the odd positions, as long as the edges alternate between

394 'matched' and 'unmatched'.

395

396 `G` is an undirected bipartite NetworkX graph.

397

398 `matching` is a dictionary representing a maximum matching in `G`, as

399 returned by, for example, :func:`maximum_matching`.

400

401 `targets` is a set of vertices.

402

403 """

404 # Get the set of matched edges and the set of unmatched edges. Only include

405 # one version of each undirected edge (for example, include edge (1, 2) but

406 # not edge (2, 1)). Using frozensets as an intermediary step we do not

407 # require nodes to be orderable.

408 edge_sets = {frozenset((u, v)) for u, v in matching.items()}

409 matched_edges = {tuple(edge) for edge in edge_sets}

410 unmatched_edges = {

411 (u, v) for (u, v) in G.edges() if frozenset((u, v)) not in edge_sets

412 }

413

414 return {

415 v

416 for v in G

417 if v in targets

418 or _is_connected_by_alternating_path(

419 G, v, matched_edges, unmatched_edges, targets

420 )

421 }

422

423

424@nx._dispatchable

425def to_vertex_cover(G, matching, top_nodes=None):

426 """Returns the minimum vertex cover corresponding to the given maximum

427 matching of the bipartite graph `G`.

428

429 Parameters

430 ----------

431 G : NetworkX graph

432

433 Undirected bipartite graph

434

435 matching : dictionary

436

437 A dictionary whose keys are vertices in `G` and whose values are the

438 distinct neighbors comprising the maximum matching for `G`, as returned

439 by, for example, :func:`maximum_matching`. The dictionary *must*

440 represent the maximum matching.

441

442 top_nodes : container

443

444 Container with all nodes in one bipartite node set. If not supplied

445 it will be computed. But if more than one solution exists an exception

446 will be raised.

447

448 Returns

449 -------

450 vertex_cover : :class:`set`

451

452 The minimum vertex cover in `G`.

453

454 Raises

455 ------

456 AmbiguousSolution

457 Raised if the input bipartite graph is disconnected and no container

458 with all nodes in one bipartite set is provided. When determining

459 the nodes in each bipartite set more than one valid solution is

460 possible if the input graph is disconnected.

461

462 Notes

463 -----

464 This function is implemented using the procedure guaranteed by `Konig's

465 theorem

466 <https://en.wikipedia.org/wiki/K%C3%B6nig%27s_theorem_%28graph_theory%29>`_,

467 which proves an equivalence between a maximum matching and a minimum vertex

468 cover in bipartite graphs.

469

470 Since a minimum vertex cover is the complement of a maximum independent set

471 for any graph, one can compute the maximum independent set of a bipartite

472 graph this way:

473

474 >>> G = nx.complete_bipartite_graph(2, 3)

475 >>> matching = nx.bipartite.maximum_matching(G)

476 >>> vertex_cover = nx.bipartite.to_vertex_cover(G, matching)

477 >>> independent_set = set(G) - vertex_cover

478 >>> print(list(independent_set))

479 [2, 3, 4]

480

481 See :mod:`bipartite documentation <networkx.algorithms.bipartite>`

482 for further details on how bipartite graphs are handled in NetworkX.

483

484 """

485 # This is a Python implementation of the algorithm described at

486 # <https://en.wikipedia.org/wiki/K%C3%B6nig%27s_theorem_%28graph_theory%29#Proof>.

487 L, R = bipartite_sets(G, top_nodes)

488 # Let U be the set of unmatched vertices in the left vertex set.

489 unmatched_vertices = set(G) - set(matching)

490 U = unmatched_vertices & L

491 # Let Z be the set of vertices that are either in U or are connected to U

492 # by alternating paths.

493 Z = _connected_by_alternating_paths(G, matching, U)

494 # At this point, every edge either has a right endpoint in Z or a left

495 # endpoint not in Z. This gives us the vertex cover.

496 return (L - Z) | (R & Z)

497

498

499#: Returns the maximum cardinality matching in the given bipartite graph.

500#:

501#: This function is simply an alias for :func:`hopcroft_karp_matching`.

502maximum_matching = hopcroft_karp_matching

503

504

505@nx._dispatchable(edge_attrs="weight")

506def minimum_weight_full_matching(G, top_nodes=None, weight="weight"):

507 r"""Returns a minimum weight full matching of the bipartite graph `G`.

508

509 Let :math:`G = ((U, V), E)` be a weighted bipartite graph with real weights

510 :math:`w : E \to \mathbb{R}`. This function then produces a matching

511 :math:`M \subseteq E` with cardinality

512

513 .. math::

514 \lvert M \rvert = \min(\lvert U \rvert, \lvert V \rvert),

515

516 which minimizes the sum of the weights of the edges included in the

517 matching, :math:`\sum_{e \in M} w(e)`, or raises an error if no such

518 matching exists.

519

520 When :math:`\lvert U \rvert = \lvert V \rvert`, this is commonly

521 referred to as a perfect matching; here, since we allow

522 :math:`\lvert U \rvert` and :math:`\lvert V \rvert` to differ, we

523 follow Karp [1]_ and refer to the matching as *full*.

524

525 Parameters

526 ----------

527 G : NetworkX graph

528

529 Undirected bipartite graph

530

531 top_nodes : container

532

533 Container with all nodes in one bipartite node set. If not supplied

534 it will be computed.

535

536 weight : string, optional (default='weight')

537

538 The edge data key used to provide each value in the matrix.

539 If None, then each edge has weight 1.

540

541 Returns

542 -------

543 matches : dictionary

544

545 The matching is returned as a dictionary, `matches`, such that

546 ``matches[v] == w`` if node `v` is matched to node `w`. Unmatched

547 nodes do not occur as a key in `matches`.

548

549 Raises

550 ------

551 ValueError

552 Raised if no full matching exists.

553

554 ImportError

555 Raised if SciPy is not available.

556

557 Notes

558 -----

559 The problem of determining a minimum weight full matching is also known as

560 the rectangular linear assignment problem. This implementation defers the

561 calculation of the assignment to SciPy.

562

563 References

564 ----------

565 .. [1] Richard Manning Karp:

566 An algorithm to Solve the m x n Assignment Problem in Expected Time

567 O(mn log n).

568 Networks, 10(2):143–152, 1980.

569

570 """

571 import numpy as np

572 import scipy as sp

573

574 left, right = nx.bipartite.sets(G, top_nodes)

575 U = list(left)

576 V = list(right)

577 # We explicitly create the biadjacency matrix having infinities

578 # where edges are missing (as opposed to zeros, which is what one would

579 # get by using toarray on the sparse matrix).

580 weights_sparse = biadjacency_matrix(

581 G, row_order=U, column_order=V, weight=weight, format="coo"

582 )

583 weights = np.full(weights_sparse.shape, np.inf)

584 weights[weights_sparse.row, weights_sparse.col] = weights_sparse.data

585 left_matches = sp.optimize.linear_sum_assignment(weights)

586 d = {U[u]: V[v] for u, v in zip(*left_matches)}

587 # d will contain the matching from edges in left to right; we need to

588 # add the ones from right to left as well.

589 d.update({v: u for u, v in d.items()})

590 return d