1"""
2*****
3Pajek
4*****
5Read graphs in Pajek format.
6
7This implementation handles directed and undirected graphs including
8those with self loops and parallel edges.
9
10Format
11------
12See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
13for format information.
14
15"""
16
17import warnings
18
19import networkx as nx
20from networkx.utils import open_file
21
22__all__ = ["read_pajek", "parse_pajek", "generate_pajek", "write_pajek"]
23
24
25def generate_pajek(G):
26 """Generate lines in Pajek graph format.
27
28 Parameters
29 ----------
30 G : graph
31 A Networkx graph
32
33 References
34 ----------
35 See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
36 for format information.
37 """
38 if G.name == "":
39 name = "NetworkX"
40 else:
41 name = G.name
42 # Apparently many Pajek format readers can't process this line
43 # So we'll leave it out for now.
44 # yield '*network %s'%name
45
46 # write nodes with attributes
47 yield f"*vertices {G.order()}"
48 nodes = list(G)
49 # make dictionary mapping nodes to integers
50 nodenumber = dict(zip(nodes, range(1, len(nodes) + 1)))
51 for n in nodes:
52 # copy node attributes and pop mandatory attributes
53 # to avoid duplication.
54 na = G.nodes.get(n, {}).copy()
55 x = na.pop("x", 0.0)
56 y = na.pop("y", 0.0)
57 try:
58 id = int(na.pop("id", nodenumber[n]))
59 except ValueError as err:
60 err.args += (
61 (
62 "Pajek format requires 'id' to be an int()."
63 " Refer to the 'Relabeling nodes' section."
64 ),
65 )
66 raise
67 nodenumber[n] = id
68 shape = na.pop("shape", "ellipse")
69 s = " ".join(map(make_qstr, (id, n, x, y, shape)))
70 # only optional attributes are left in na.
71 for k, v in na.items():
72 if isinstance(v, str) and v.strip() != "":
73 s += f" {make_qstr(k)} {make_qstr(v)}"
74 else:
75 warnings.warn(
76 f"Node attribute {k} is not processed. {('Empty attribute' if isinstance(v, str) else 'Non-string attribute')}."
77 )
78 yield s
79
80 # write edges with attributes
81 if G.is_directed():
82 yield "*arcs"
83 else:
84 yield "*edges"
85 for u, v, edgedata in G.edges(data=True):
86 d = edgedata.copy()
87 value = d.pop("weight", 1.0) # use 1 as default edge value
88 s = " ".join(map(make_qstr, (nodenumber[u], nodenumber[v], value)))
89 for k, v in d.items():
90 if isinstance(v, str) and v.strip() != "":
91 s += f" {make_qstr(k)} {make_qstr(v)}"
92 else:
93 warnings.warn(
94 f"Edge attribute {k} is not processed. {('Empty attribute' if isinstance(v, str) else 'Non-string attribute')}."
95 )
96 yield s
97
98
99@open_file(1, mode="wb")
100def write_pajek(G, path, encoding="UTF-8"):
101 """Write graph in Pajek format to path.
102
103 Parameters
104 ----------
105 G : graph
106 A Networkx graph
107 path : file or string
108 File or filename to write.
109 Filenames ending in .gz or .bz2 will be compressed.
110
111 Examples
112 --------
113 >>> G = nx.path_graph(4)
114 >>> nx.write_pajek(G, "test.netP4")
115
116 Warnings
117 --------
118 Optional node attributes and edge attributes must be non-empty strings.
119 Otherwise it will not be written into the file. You will need to
120 convert those attributes to strings if you want to keep them.
121
122 References
123 ----------
124 See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
125 for format information.
126 """
127 for line in generate_pajek(G):
128 line += "\n"
129 path.write(line.encode(encoding))
130
131
132@open_file(0, mode="rb")
133@nx._dispatchable(graphs=None, returns_graph=True)
134def read_pajek(path, encoding="UTF-8"):
135 """Read graph in Pajek format from path.
136
137 Parameters
138 ----------
139 path : file or string
140 Filename or file handle to read.
141 Filenames ending in .gz or .bz2 will be decompressed.
142
143 Returns
144 -------
145 G : NetworkX MultiGraph or MultiDiGraph.
146
147 Examples
148 --------
149 >>> G = nx.path_graph(4)
150 >>> nx.write_pajek(G, "test.net")
151 >>> G = nx.read_pajek("test.net")
152
153 To create a Graph instead of a MultiGraph use
154
155 >>> G1 = nx.Graph(G)
156
157 References
158 ----------
159 See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
160 for format information.
161 """
162 lines = (line.decode(encoding) for line in path)
163 return parse_pajek(lines)
164
165
166@nx._dispatchable(graphs=None, returns_graph=True)
167def parse_pajek(lines):
168 """Parse Pajek format graph from string or iterable.
169
170 Parameters
171 ----------
172 lines : string or iterable
173 Data in Pajek format.
174
175 Returns
176 -------
177 G : NetworkX graph
178
179 See Also
180 --------
181 read_pajek
182
183 """
184 import shlex
185
186 # multigraph=False
187 if isinstance(lines, str):
188 lines = iter(lines.split("\n"))
189 lines = iter([line.rstrip("\n") for line in lines])
190 G = nx.MultiDiGraph() # are multiedges allowed in Pajek? assume yes
191 labels = [] # in the order of the file, needed for matrix
192 while lines:
193 try:
194 l = next(lines)
195 except: # EOF
196 break
197 if l.lower().startswith("*network"):
198 try:
199 label, name = l.split(None, 1)
200 except ValueError:
201 # Line was not of the form: *network NAME
202 pass
203 else:
204 G.graph["name"] = name
205 elif l.lower().startswith("*vertices"):
206 nodelabels = {}
207 l, nnodes = l.split()
208 for i in range(int(nnodes)):
209 l = next(lines)
210 try:
211 splitline = [
212 x.decode("utf-8") for x in shlex.split(str(l).encode("utf-8"))
213 ]
214 except AttributeError:
215 splitline = shlex.split(str(l))
216 id, label = splitline[0:2]
217 labels.append(label)
218 G.add_node(label)
219 nodelabels[id] = label
220 G.nodes[label]["id"] = id
221 try:
222 x, y, shape = splitline[2:5]
223 G.nodes[label].update(
224 {"x": float(x), "y": float(y), "shape": shape}
225 )
226 except:
227 pass
228 extra_attr = zip(splitline[5::2], splitline[6::2])
229 G.nodes[label].update(extra_attr)
230 elif l.lower().startswith("*edges") or l.lower().startswith("*arcs"):
231 if l.lower().startswith("*edge"):
232 # switch from multidigraph to multigraph
233 G = nx.MultiGraph(G)
234 if l.lower().startswith("*arcs"):
235 # switch to directed with multiple arcs for each existing edge
236 G = G.to_directed()
237 for l in lines:
238 try:
239 splitline = [
240 x.decode("utf-8") for x in shlex.split(str(l).encode("utf-8"))
241 ]
242 except AttributeError:
243 splitline = shlex.split(str(l))
244
245 if len(splitline) < 2:
246 continue
247 ui, vi = splitline[0:2]
248 u = nodelabels.get(ui, ui)
249 v = nodelabels.get(vi, vi)
250 # parse the data attached to this edge and put in a dictionary
251 edge_data = {}
252 try:
253 # there should always be a single value on the edge?
254 w = splitline[2:3]
255 edge_data.update({"weight": float(w[0])})
256 except:
257 pass
258 # if there isn't, just assign a 1
259 # edge_data.update({'value':1})
260 extra_attr = zip(splitline[3::2], splitline[4::2])
261 edge_data.update(extra_attr)
262 # if G.has_edge(u,v):
263 # multigraph=True
264 G.add_edge(u, v, **edge_data)
265 elif l.lower().startswith("*matrix"):
266 G = nx.DiGraph(G)
267 adj_list = (
268 (labels[row], labels[col], {"weight": int(data)})
269 for (row, line) in enumerate(lines)
270 for (col, data) in enumerate(line.split())
271 if int(data) != 0
272 )
273 G.add_edges_from(adj_list)
274
275 return G
276
277
278def make_qstr(t):
279 """Returns the string representation of t.
280 Add outer double-quotes if the string has a space.
281 """
282 if not isinstance(t, str):
283 t = str(t)
284 if " " in t:
285 t = f'"{t}"'
286 return t