1"""Session object for building, serializing, sending, and receiving messages.
2
3The Session object supports serialization, HMAC signatures,
4and metadata on messages.
5
6Also defined here are utilities for working with Sessions:
7* A SessionFactory to be used as a base class for configurables that work with
8Sessions.
9* A Message object for convenience that allows attribute-access to the msg dict.
10"""
11
12# Copyright (c) Jupyter Development Team.
13# Distributed under the terms of the Modified BSD License.
14from __future__ import annotations
15
16import functools
17import hashlib
18import hmac
19import json
20import logging
21import os
22import pickle
23import pprint
24import random
25import typing as t
26import warnings
27from binascii import b2a_hex
28from datetime import datetime, timezone
29from hmac import compare_digest
30
31# We are using compare_digest to limit the surface of timing attacks
32import zmq.asyncio
33from tornado.ioloop import IOLoop
34from traitlets import (
35 Any,
36 Bool,
37 Callable,
38 CBytes,
39 CUnicode,
40 Dict,
41 DottedObjectName,
42 Instance,
43 Integer,
44 Set,
45 TraitError,
46 Unicode,
47 observe,
48)
49from traitlets.config.configurable import Configurable, LoggingConfigurable
50from traitlets.log import get_logger
51from traitlets.utils.importstring import import_item
52from zmq.eventloop.zmqstream import ZMQStream
53
54from ._version import protocol_version
55from .adapter import adapt
56from .jsonutil import extract_dates, json_clean, json_default, squash_dates
57
58PICKLE_PROTOCOL = pickle.DEFAULT_PROTOCOL
59
60utc = timezone.utc
61
62# -----------------------------------------------------------------------------
63# utility functions
64# -----------------------------------------------------------------------------
65
66
67def squash_unicode(obj: t.Any) -> t.Any:
68 """coerce unicode back to bytestrings."""
69 if isinstance(obj, dict):
70 for key in list(obj.keys()):
71 obj[key] = squash_unicode(obj[key])
72 if isinstance(key, str):
73 obj[squash_unicode(key)] = obj.pop(key)
74 elif isinstance(obj, list):
75 for i, v in enumerate(obj):
76 obj[i] = squash_unicode(v)
77 elif isinstance(obj, str):
78 obj = obj.encode("utf8")
79 return obj
80
81
82# -----------------------------------------------------------------------------
83# globals and defaults
84# -----------------------------------------------------------------------------
85
86# default values for the thresholds:
87MAX_ITEMS = 64
88MAX_BYTES = 1024
89
90# ISO8601-ify datetime objects
91# allow unicode
92# disallow nan, because it's not actually valid JSON
93
94
95def json_packer(obj: t.Any) -> bytes:
96 """Convert a json object to a bytes."""
97 try:
98 return json.dumps(
99 obj,
100 default=json_default,
101 ensure_ascii=False,
102 allow_nan=False,
103 ).encode("utf8", errors="surrogateescape")
104 except (TypeError, ValueError) as e:
105 # Fallback to trying to clean the json before serializing
106 packed = json.dumps(
107 json_clean(obj),
108 default=json_default,
109 ensure_ascii=False,
110 allow_nan=False,
111 ).encode("utf8", errors="surrogateescape")
112
113 warnings.warn(
114 f"Message serialization failed with:\n{e}\n"
115 "Supporting this message is deprecated in jupyter-client 7, please make "
116 "sure your message is JSON-compliant",
117 stacklevel=2,
118 )
119
120 return packed
121
122
123def json_unpacker(s: str | bytes) -> t.Any:
124 """Convert a json bytes or string to an object."""
125 if isinstance(s, bytes):
126 s = s.decode("utf8", "replace")
127 return json.loads(s)
128
129
130try:
131 import orjson
132except ModuleNotFoundError:
133 has_orjson = False
134 orjson_packer, orjson_unpacker = json_packer, json_unpacker
135else:
136 has_orjson = True
137
138 def orjson_packer(
139 obj: t.Any, *, option: int | None = orjson.OPT_NAIVE_UTC | orjson.OPT_UTC_Z
140 ) -> bytes:
141 """Convert a json object to a bytes using orjson with fallback to json_packer."""
142 try:
143 return orjson.dumps(obj, default=json_default, option=option)
144 except Exception:
145 return json_packer(obj)
146
147 def orjson_unpacker(s: str | bytes) -> t.Any:
148 """Convert a json bytes or string to an object using orjson with fallback to json_unpacker."""
149 try:
150 return orjson.loads(s)
151 except Exception:
152 return json_unpacker(s)
153
154
155try:
156 import msgpack
157except ModuleNotFoundError:
158 has_msgpack = False
159else:
160 has_msgpack = True
161 msgpack_packer = functools.partial(msgpack.packb, default=json_default)
162 msgpack_unpacker = msgpack.unpackb
163
164
165def pickle_packer(o: t.Any) -> bytes:
166 """Pack an object using the pickle module."""
167 return pickle.dumps(squash_dates(o), PICKLE_PROTOCOL)
168
169
170pickle_unpacker = pickle.loads
171
172
173DELIM = b"<IDS|MSG>"
174# singleton dummy tracker, which will always report as done
175DONE = zmq.MessageTracker()
176
177# -----------------------------------------------------------------------------
178# Mixin tools for apps that use Sessions
179# -----------------------------------------------------------------------------
180
181
182def new_id() -> str:
183 """Generate a new random id.
184
185 Avoids problematic runtime import in stdlib uuid on Python 2.
186
187 Returns
188 -------
189
190 id string (16 random bytes as hex-encoded text, chunks separated by '-')
191 """
192 buf = os.urandom(16)
193 return "-".join(b2a_hex(x).decode("ascii") for x in (buf[:4], buf[4:]))
194
195
196def new_id_bytes() -> bytes:
197 """Return new_id as ascii bytes"""
198 return new_id().encode("ascii")
199
200
201session_aliases = {
202 "ident": "Session.session",
203 "user": "Session.username",
204 "keyfile": "Session.keyfile",
205}
206
207session_flags = {
208 "secure": (
209 {"Session": {"key": new_id_bytes(), "keyfile": ""}},
210 """Use HMAC digests for authentication of messages.
211 Setting this flag will generate a new UUID to use as the HMAC key.
212 """,
213 ),
214 "no-secure": (
215 {"Session": {"key": b"", "keyfile": ""}},
216 """Don't authenticate messages.""",
217 ),
218}
219
220
221def default_secure(cfg: t.Any) -> None: # pragma: no cover
222 """Set the default behavior for a config environment to be secure.
223
224 If Session.key/keyfile have not been set, set Session.key to
225 a new random UUID.
226 """
227 warnings.warn("default_secure is deprecated", DeprecationWarning, stacklevel=2)
228 if "Session" in cfg and ("key" in cfg.Session or "keyfile" in cfg.Session):
229 return
230 # key/keyfile not specified, generate new UUID:
231 cfg.Session.key = new_id_bytes()
232
233
234def utcnow() -> datetime:
235 """Return timezone-aware UTC timestamp"""
236 return datetime.now(utc)
237
238
239# -----------------------------------------------------------------------------
240# Classes
241# -----------------------------------------------------------------------------
242
243
244class SessionFactory(LoggingConfigurable):
245 """The Base class for configurables that have a Session, Context, logger,
246 and IOLoop.
247 """
248
249 logname = Unicode("")
250
251 @observe("logname")
252 def _logname_changed(self, change: t.Any) -> None:
253 self.log = logging.getLogger(change["new"])
254
255 # not configurable:
256 context = Instance("zmq.Context")
257
258 def _context_default(self) -> zmq.Context:
259 return zmq.Context()
260
261 session = Instance("jupyter_client.session.Session", allow_none=True)
262
263 loop = Instance("tornado.ioloop.IOLoop")
264
265 def _loop_default(self) -> IOLoop:
266 return IOLoop.current()
267
268 def __init__(self, **kwargs: t.Any) -> None:
269 """Initialize a session factory."""
270 super().__init__(**kwargs)
271
272 if self.session is None:
273 # construct the session
274 self.session = Session(**kwargs)
275
276
277class Message:
278 """A simple message object that maps dict keys to attributes.
279
280 A Message can be created from a dict and a dict from a Message instance
281 simply by calling dict(msg_obj)."""
282
283 def __init__(self, msg_dict: dict[str, t.Any]) -> None:
284 """Initialize a message."""
285 dct = self.__dict__
286 for k, v in dict(msg_dict).items():
287 if isinstance(v, dict):
288 v = Message(v) # noqa
289 dct[k] = v
290
291 # Having this iterator lets dict(msg_obj) work out of the box.
292 def __iter__(self) -> t.ItemsView[str, t.Any]:
293 return iter(self.__dict__.items()) # type:ignore[return-value]
294
295 def __repr__(self) -> str:
296 return repr(self.__dict__)
297
298 def __str__(self) -> str:
299 return pprint.pformat(self.__dict__)
300
301 def __contains__(self, k: object) -> bool:
302 return k in self.__dict__
303
304 def __getitem__(self, k: str) -> t.Any:
305 return self.__dict__[k]
306
307
308def msg_header(
309 msg_id: str, msg_type: str, username: str, session: Session | str
310) -> dict[str, t.Any]:
311 """Create a new message header"""
312 date = utcnow()
313 version = protocol_version
314 return locals()
315
316
317def extract_header(msg_or_header: dict[str, t.Any]) -> dict[str, t.Any]:
318 """Given a message or header, return the header."""
319 if not msg_or_header:
320 return {}
321 try:
322 # See if msg_or_header is the entire message.
323 h = msg_or_header["header"]
324 except KeyError:
325 try:
326 # See if msg_or_header is just the header
327 h = msg_or_header["msg_id"]
328 except KeyError:
329 raise
330 else:
331 h = msg_or_header
332 if not isinstance(h, dict):
333 h = dict(h)
334 return h
335
336
337class Session(Configurable):
338 """Object for handling serialization and sending of messages.
339
340 The Session object handles building messages and sending them
341 with ZMQ sockets or ZMQStream objects. Objects can communicate with each
342 other over the network via Session objects, and only need to work with the
343 dict-based IPython message spec. The Session will handle
344 serialization/deserialization, security, and metadata.
345
346 Sessions support configurable serialization via packer/unpacker traits,
347 and signing with HMAC digests via the key/keyfile traits.
348
349 Parameters
350 ----------
351
352 debug : bool
353 whether to trigger extra debugging statements
354 packer/unpacker : str : 'orjson', 'json', 'pickle', 'msgpack' or import_string
355 importstrings for methods to serialize message parts. If just
356 'json' or 'pickle', predefined JSON and pickle packers will be used.
357 Otherwise, the entire importstring must be used.
358
359 The functions must accept at least valid JSON input, and output *bytes*.
360
361 For example, to use msgpack:
362 packer = 'msgpack.packb', unpacker='msgpack.unpackb'
363 pack/unpack : callables
364 You can also set the pack/unpack callables for serialization directly.
365 session : bytes
366 the ID of this Session object. The default is to generate a new UUID.
367 username : unicode
368 username added to message headers. The default is to ask the OS.
369 key : bytes
370 The key used to initialize an HMAC signature. If unset, messages
371 will not be signed or checked.
372 keyfile : filepath
373 The file containing a key. If this is set, `key` will be initialized
374 to the contents of the file.
375
376 """
377
378 debug = Bool(False, config=True, help="""Debug output in the Session""")
379
380 check_pid = Bool(
381 True,
382 config=True,
383 help="""Whether to check PID to protect against calls after fork.
384
385 This check can be disabled if fork-safety is handled elsewhere.
386 """,
387 )
388
389 # serialization traits:
390 packer = DottedObjectName(
391 "orjson" if has_orjson else "json",
392 config=True,
393 help="""The name of the packer for serializing messages.
394 Should be one of 'json', 'pickle', or an import name
395 for a custom callable serializer.""",
396 )
397 unpacker = DottedObjectName(
398 "orjson" if has_orjson else "json",
399 config=True,
400 help="""The name of the unpacker for unserializing messages.
401 Only used with custom functions for `packer`.""",
402 )
403 pack = Callable(orjson_packer if has_orjson else json_packer) # the actual packer function
404 unpack = Callable(
405 orjson_unpacker if has_orjson else json_unpacker
406 ) # the actual unpacker function
407
408 @observe("packer", "unpacker")
409 def _packer_unpacker_changed(self, change: t.Any) -> None:
410 new = change["new"].lower()
411 if new == "orjson" and has_orjson:
412 self.pack, self.unpack = orjson_packer, orjson_unpacker
413 elif new == "json" or new == "orjson":
414 self.pack, self.unpack = json_packer, json_unpacker
415 elif new == "pickle":
416 self.pack, self.unpack = pickle_packer, pickle_unpacker
417 elif new == "msgpack" and has_msgpack:
418 self.pack, self.unpack = msgpack_packer, msgpack_unpacker
419 else:
420 obj = import_item(str(change["new"]))
421 name = "pack" if change["name"] == "packer" else "unpack"
422 self.set_trait(name, obj)
423 return
424 self.packer = self.unpacker = change["new"]
425
426 session = CUnicode("", config=True, help="""The UUID identifying this session.""")
427
428 def _session_default(self) -> str:
429 u = new_id()
430 self.bsession = u.encode("ascii")
431 return u
432
433 @observe("session")
434 def _session_changed(self, change: t.Any) -> None:
435 self.bsession = self.session.encode("ascii")
436
437 # bsession is the session as bytes
438 bsession = CBytes(b"")
439
440 username = Unicode(
441 os.environ.get("USER", "username"),
442 help="""Username for the Session. Default is your system username.""",
443 config=True,
444 )
445
446 metadata = Dict(
447 {},
448 config=True,
449 help="Metadata dictionary, which serves as the default top-level metadata dict for each message.",
450 )
451
452 # if 0, no adapting to do.
453 adapt_version = Integer(0)
454
455 # message signature related traits:
456
457 key = CBytes(config=True, help="""execution key, for signing messages.""")
458
459 def _key_default(self) -> bytes:
460 return new_id_bytes()
461
462 @observe("key")
463 def _key_changed(self, change: t.Any) -> None:
464 self._new_auth()
465
466 signature_scheme = Unicode(
467 "hmac-sha256",
468 config=True,
469 help="""The digest scheme used to construct the message signatures.
470 Must have the form 'hmac-HASH'.""",
471 )
472
473 @observe("signature_scheme")
474 def _signature_scheme_changed(self, change: t.Any) -> None:
475 new = change["new"]
476 if not new.startswith("hmac-"):
477 raise TraitError("signature_scheme must start with 'hmac-', got %r" % new)
478 hash_name = new.split("-", 1)[1]
479 try:
480 self.digest_mod = getattr(hashlib, hash_name)
481 except AttributeError as e:
482 raise TraitError("hashlib has no such attribute: %s" % hash_name) from e
483 self._new_auth()
484
485 digest_mod = Any()
486
487 def _digest_mod_default(self) -> t.Callable:
488 return hashlib.sha256
489
490 auth = Instance(hmac.HMAC, allow_none=True)
491
492 def _new_auth(self) -> None:
493 if self.key:
494 self.auth = hmac.HMAC(self.key, digestmod=self.digest_mod)
495 else:
496 self.auth = None
497
498 digest_history = Set()
499 digest_history_size = Integer(
500 2**16,
501 config=True,
502 help="""The maximum number of digests to remember.
503
504 The digest history will be culled when it exceeds this value.
505 """,
506 )
507
508 keyfile = Unicode("", config=True, help="""path to file containing execution key.""")
509
510 @observe("keyfile")
511 def _keyfile_changed(self, change: t.Any) -> None:
512 with open(change["new"], "rb") as f:
513 self.key = f.read().strip()
514
515 # for protecting against sends from forks
516 pid = Integer()
517
518 # thresholds:
519 copy_threshold = Integer(
520 2**16,
521 config=True,
522 help="Threshold (in bytes) beyond which a buffer should be sent without copying.",
523 )
524 buffer_threshold = Integer(
525 MAX_BYTES,
526 config=True,
527 help="Threshold (in bytes) beyond which an object's buffer should be extracted to avoid pickling.",
528 )
529 item_threshold = Integer(
530 MAX_ITEMS,
531 config=True,
532 help="""The maximum number of items for a container to be introspected for custom serialization.
533 Containers larger than this are pickled outright.
534 """,
535 )
536
537 def __init__(self, **kwargs: t.Any) -> None:
538 """create a Session object
539
540 Parameters
541 ----------
542
543 debug : bool
544 whether to trigger extra debugging statements
545 packer/unpacker : str : 'orjson', 'json', 'pickle', 'msgpack' or import_string
546 importstrings for methods to serialize message parts. If just
547 'json' or 'pickle', predefined JSON and pickle packers will be used.
548 Otherwise, the entire importstring must be used.
549
550 The functions must accept at least valid JSON input, and output
551 *bytes*.
552
553 For example, to use msgpack:
554 packer = 'msgpack.packb', unpacker='msgpack.unpackb'
555 pack/unpack : callables
556 You can also set the pack/unpack callables for serialization
557 directly.
558 session : unicode (must be ascii)
559 the ID of this Session object. The default is to generate a new
560 UUID.
561 bsession : bytes
562 The session as bytes
563 username : unicode
564 username added to message headers. The default is to ask the OS.
565 key : bytes
566 The key used to initialize an HMAC signature. If unset, messages
567 will not be signed or checked.
568 signature_scheme : str
569 The message digest scheme. Currently must be of the form 'hmac-HASH',
570 where 'HASH' is a hashing function available in Python's hashlib.
571 The default is 'hmac-sha256'.
572 This is ignored if 'key' is empty.
573 keyfile : filepath
574 The file containing a key. If this is set, `key` will be
575 initialized to the contents of the file.
576 """
577 super().__init__(**kwargs)
578 self._check_packers()
579 self.none = self.pack({})
580 # ensure self._session_default() if necessary, so bsession is defined:
581 self.session # noqa
582 self.pid = os.getpid()
583 self._new_auth()
584 if not self.key:
585 get_logger().warning(
586 "Message signing is disabled. This is insecure and not recommended!"
587 )
588
589 def clone(self) -> Session:
590 """Create a copy of this Session
591
592 Useful when connecting multiple times to a given kernel.
593 This prevents a shared digest_history warning about duplicate digests
594 due to multiple connections to IOPub in the same process.
595
596 .. versionadded:: 5.1
597 """
598 # make a copy
599 new_session = type(self)()
600 for name in self.traits():
601 setattr(new_session, name, getattr(self, name))
602 # fork digest_history
603 new_session.digest_history = set()
604 new_session.digest_history.update(self.digest_history)
605 return new_session
606
607 message_count = 0
608
609 @property
610 def msg_id(self) -> str:
611 message_number = self.message_count
612 self.message_count += 1
613 return f"{self.session}_{os.getpid()}_{message_number}"
614
615 def _check_packers(self) -> None:
616 """check packers for datetime support."""
617 pack = self.pack
618 unpack = self.unpack
619
620 # check simple serialization
621 msg_list = {"a": [1, "hi"]}
622 try:
623 packed = pack(msg_list)
624 except Exception as e:
625 msg = f"packer '{self.packer}' could not serialize a simple message: {e}"
626 raise ValueError(msg) from e
627
628 # ensure packed message is bytes
629 if not isinstance(packed, bytes):
630 raise ValueError("message packed to %r, but bytes are required" % type(packed))
631
632 # check that unpack is pack's inverse
633 try:
634 unpacked = unpack(packed)
635 assert unpacked == msg_list
636 except Exception as e:
637 msg = f"unpacker {self.unpacker!r} could not handle output from packer {self.packer!r}: {e}"
638 raise ValueError(msg) from e
639
640 # check datetime support
641 msg_datetime = {"t": utcnow()}
642 try:
643 unpacked = unpack(pack(msg_datetime))
644 if isinstance(unpacked["t"], datetime):
645 msg = "Shouldn't deserialize to datetime"
646 raise ValueError(msg)
647 except Exception:
648 self.pack = lambda o: pack(squash_dates(o))
649 self.unpack = lambda s: unpack(s)
650
651 def msg_header(self, msg_type: str) -> dict[str, t.Any]:
652 """Create a header for a message type."""
653 return msg_header(self.msg_id, msg_type, self.username, self.session)
654
655 def msg(
656 self,
657 msg_type: str,
658 content: dict | None = None,
659 parent: dict[str, t.Any] | None = None,
660 header: dict[str, t.Any] | None = None,
661 metadata: dict[str, t.Any] | None = None,
662 ) -> dict[str, t.Any]:
663 """Return the nested message dict.
664
665 This format is different from what is sent over the wire. The
666 serialize/deserialize methods converts this nested message dict to the wire
667 format, which is a list of message parts.
668 """
669 msg = {}
670 header = self.msg_header(msg_type) if header is None else header
671 msg["header"] = header
672 msg["msg_id"] = header["msg_id"]
673 msg["msg_type"] = header["msg_type"]
674 msg["parent_header"] = {} if parent is None else extract_header(parent)
675 msg["content"] = {} if content is None else content
676 msg["metadata"] = self.metadata.copy()
677 if metadata is not None:
678 msg["metadata"].update(metadata)
679 return msg
680
681 def sign(self, msg_list: list) -> bytes:
682 """Sign a message with HMAC digest. If no auth, return b''.
683
684 Parameters
685 ----------
686 msg_list : list
687 The [p_header,p_parent,p_content] part of the message list.
688 """
689 if self.auth is None:
690 return b""
691 h = self.auth.copy()
692 for m in msg_list:
693 h.update(m)
694 return h.hexdigest().encode()
695
696 def serialize(
697 self,
698 msg: dict[str, t.Any],
699 ident: list[bytes] | bytes | None = None,
700 ) -> list[bytes]:
701 """Serialize the message components to bytes.
702
703 This is roughly the inverse of deserialize. The serialize/deserialize
704 methods work with full message lists, whereas pack/unpack work with
705 the individual message parts in the message list.
706
707 Parameters
708 ----------
709 msg : dict or Message
710 The next message dict as returned by the self.msg method.
711
712 Returns
713 -------
714 msg_list : list
715 The list of bytes objects to be sent with the format::
716
717 [ident1, ident2, ..., DELIM, HMAC, p_header, p_parent,
718 p_metadata, p_content, buffer1, buffer2, ...]
719
720 In this list, the ``p_*`` entities are the packed or serialized
721 versions, so if JSON is used, these are utf8 encoded JSON strings.
722 """
723 content = msg.get("content", {})
724 if content is None:
725 content = self.none
726 elif isinstance(content, dict):
727 content = self.pack(content)
728 elif isinstance(content, bytes):
729 # content is already packed, as in a relayed message
730 pass
731 elif isinstance(content, str):
732 # should be bytes, but JSON often spits out unicode
733 content = content.encode("utf8")
734 else:
735 raise TypeError("Content incorrect type: %s" % type(content))
736
737 real_message = [
738 self.pack(msg["header"]),
739 self.pack(msg["parent_header"]),
740 self.pack(msg["metadata"]),
741 content,
742 ]
743
744 to_send = []
745
746 if isinstance(ident, list):
747 # accept list of idents
748 to_send.extend(ident)
749 elif ident is not None:
750 to_send.append(ident)
751 to_send.append(DELIM)
752
753 signature = self.sign(real_message)
754 to_send.append(signature)
755
756 to_send.extend(real_message)
757
758 return to_send
759
760 def send(
761 self,
762 stream: zmq.sugar.socket.Socket | ZMQStream | None,
763 msg_or_type: dict[str, t.Any] | str,
764 content: dict[str, t.Any] | None = None,
765 parent: dict[str, t.Any] | None = None,
766 ident: bytes | list[bytes] | None = None,
767 buffers: list[bytes | memoryview[bytes]] | None = None,
768 track: bool = False,
769 header: dict[str, t.Any] | None = None,
770 metadata: dict[str, t.Any] | None = None,
771 ) -> dict[str, t.Any] | None:
772 """Build and send a message via stream or socket.
773
774 The message format used by this function internally is as follows:
775
776 [ident1,ident2,...,DELIM,HMAC,p_header,p_parent,p_content,
777 buffer1,buffer2,...]
778
779 The serialize/deserialize methods convert the nested message dict into this
780 format.
781
782 Parameters
783 ----------
784
785 stream : zmq.Socket or ZMQStream
786 The socket-like object used to send the data.
787 msg_or_type : str or Message/dict
788 Normally, msg_or_type will be a msg_type unless a message is being
789 sent more than once. If a header is supplied, this can be set to
790 None and the msg_type will be pulled from the header.
791
792 content : dict or None
793 The content of the message (ignored if msg_or_type is a message).
794 header : dict or None
795 The header dict for the message (ignored if msg_to_type is a message).
796 parent : Message or dict or None
797 The parent or parent header describing the parent of this message
798 (ignored if msg_or_type is a message).
799 ident : bytes or list of bytes
800 The zmq.IDENTITY routing path.
801 metadata : dict or None
802 The metadata describing the message
803 buffers : list or None
804 The already-serialized buffers to be appended to the message.
805 track : bool
806 Whether to track. Only for use with Sockets, because ZMQStream
807 objects cannot track messages.
808
809
810 Returns
811 -------
812 msg : dict
813 The constructed message.
814 """
815 if not isinstance(stream, zmq.Socket):
816 # ZMQStreams and dummy sockets do not support tracking.
817 track = False
818
819 if isinstance(stream, zmq.asyncio.Socket):
820 assert stream is not None
821 stream = zmq.Socket.shadow(stream.underlying)
822
823 if isinstance(msg_or_type, Message | dict):
824 # We got a Message or message dict, not a msg_type so don't
825 # build a new Message.
826 msg = msg_or_type
827 buffers = buffers or msg.get("buffers", [])
828 else:
829 msg = self.msg(
830 msg_or_type,
831 content=content,
832 parent=parent,
833 header=header,
834 metadata=metadata,
835 )
836 if self.check_pid and os.getpid() != self.pid:
837 get_logger().warning("WARNING: attempted to send message from fork\n%s", msg)
838 return None
839 buffers = [] if buffers is None else buffers
840 for idx, buf in enumerate(buffers):
841 if isinstance(buf, memoryview):
842 view = buf
843 else:
844 try:
845 # check to see if buf supports the buffer protocol.
846 view = memoryview(buf)
847 except TypeError as e:
848 emsg = "Buffer objects must support the buffer protocol."
849 raise TypeError(emsg) from e
850 if not view.contiguous:
851 # zmq requires memoryviews to be contiguous
852 raise ValueError("Buffer %i (%r) is not contiguous" % (idx, buf))
853
854 if self.adapt_version:
855 msg = adapt(msg, self.adapt_version)
856 to_send = self.serialize(msg, ident)
857 to_send.extend(buffers) # type: ignore[arg-type]
858 longest = max([len(s) for s in to_send])
859 copy = longest < self.copy_threshold
860
861 if stream and buffers and track and not copy:
862 # only really track when we are doing zero-copy buffers
863 tracker = stream.send_multipart(to_send, copy=False, track=True)
864 elif stream:
865 # use dummy tracker, which will be done immediately
866 tracker = DONE
867 stream.send_multipart(to_send, copy=copy)
868 else:
869 tracker = DONE
870
871 if self.debug:
872 pprint.pprint(msg) # noqa
873 pprint.pprint(to_send) # noqa
874 pprint.pprint(buffers) # noqa
875
876 msg["tracker"] = tracker
877
878 return msg
879
880 def send_raw(
881 self,
882 stream: zmq.sugar.socket.Socket,
883 msg_list: list,
884 flags: int = 0,
885 copy: bool = True,
886 ident: bytes | list[bytes] | None = None,
887 ) -> None:
888 """Send a raw message via ident path.
889
890 This method is used to send a already serialized message.
891
892 Parameters
893 ----------
894 stream : ZMQStream or Socket
895 The ZMQ stream or socket to use for sending the message.
896 msg_list : list
897 The serialized list of messages to send. This only includes the
898 [p_header,p_parent,p_metadata,p_content,buffer1,buffer2,...] portion of
899 the message.
900 ident : ident or list
901 A single ident or a list of idents to use in sending.
902 """
903 to_send = []
904 if isinstance(ident, bytes):
905 ident = [ident]
906 if ident is not None:
907 to_send.extend(ident)
908
909 to_send.append(DELIM)
910 # Don't include buffers in signature (per spec).
911 to_send.append(self.sign(msg_list[0:4]))
912 to_send.extend(msg_list)
913 if isinstance(stream, zmq.asyncio.Socket):
914 stream = zmq.Socket.shadow(stream.underlying)
915 stream.send_multipart(to_send, flags, copy=copy)
916
917 def recv(
918 self,
919 socket: zmq.sugar.socket.Socket,
920 mode: int = zmq.NOBLOCK,
921 content: bool = True,
922 copy: bool = True,
923 ) -> tuple[list[bytes] | None, dict[str, t.Any] | None]:
924 """Receive and unpack a message.
925
926 Parameters
927 ----------
928 socket : ZMQStream or Socket
929 The socket or stream to use in receiving.
930
931 Returns
932 -------
933 [idents], msg
934 [idents] is a list of idents and msg is a nested message dict of
935 same format as self.msg returns.
936 """
937 if isinstance(socket, ZMQStream): # type:ignore[unreachable]
938 socket = socket.socket # type:ignore[unreachable]
939 if isinstance(socket, zmq.asyncio.Socket):
940 socket = zmq.Socket.shadow(socket.underlying)
941
942 try:
943 msg_list = socket.recv_multipart(mode, copy=copy)
944 except zmq.ZMQError as e:
945 if e.errno == zmq.EAGAIN:
946 # We can convert EAGAIN to None as we know in this case
947 # recv_multipart won't return None.
948 return None, None
949 else:
950 raise
951 # split multipart message into identity list and message dict
952 # invalid large messages can cause very expensive string comparisons
953 idents, msg_list = self.feed_identities(msg_list, copy)
954 try:
955 return idents, self.deserialize(msg_list, content=content, copy=copy)
956 except Exception as e:
957 # TODO: handle it
958 raise e
959
960 def feed_identities(
961 self, msg_list: list[bytes] | list[zmq.Message], copy: bool = True
962 ) -> tuple[list[bytes], list[bytes] | list[zmq.Message]]:
963 """Split the identities from the rest of the message.
964
965 Feed until DELIM is reached, then return the prefix as idents and
966 remainder as msg_list. This is easily broken by setting an IDENT to DELIM,
967 but that would be silly.
968
969 Parameters
970 ----------
971 msg_list : a list of Message or bytes objects
972 The message to be split.
973 copy : bool
974 flag determining whether the arguments are bytes or Messages
975
976 Returns
977 -------
978 (idents, msg_list) : two lists
979 idents will always be a list of bytes, each of which is a ZMQ
980 identity. msg_list will be a list of bytes or zmq.Messages of the
981 form [HMAC,p_header,p_parent,p_content,buffer1,buffer2,...] and
982 should be unpackable/unserializable via self.deserialize at this
983 point.
984 """
985 if copy:
986 msg_list = t.cast(t.List[bytes], msg_list)
987 idx = msg_list.index(DELIM)
988 return msg_list[:idx], msg_list[idx + 1 :]
989 else:
990 msg_list = t.cast(t.List[zmq.Message], msg_list)
991 failed = True
992 for idx, m in enumerate(msg_list): # noqa
993 if m.bytes == DELIM:
994 failed = False
995 break
996 if failed:
997 msg = "DELIM not in msg_list"
998 raise ValueError(msg)
999 idents, msg_list = msg_list[:idx], msg_list[idx + 1 :]
1000 return [bytes(m.bytes) for m in idents], msg_list
1001
1002 def _add_digest(self, signature: bytes) -> None:
1003 """add a digest to history to protect against replay attacks"""
1004 if self.digest_history_size == 0:
1005 # no history, never add digests
1006 return
1007
1008 self.digest_history.add(signature)
1009 if len(self.digest_history) > self.digest_history_size:
1010 # threshold reached, cull 10%
1011 self._cull_digest_history()
1012
1013 def _cull_digest_history(self) -> None:
1014 """cull the digest history
1015
1016 Removes a randomly selected 10% of the digest history
1017 """
1018 current = len(self.digest_history)
1019 n_to_cull = max(int(current // 10), current - self.digest_history_size)
1020 if n_to_cull >= current:
1021 self.digest_history = set()
1022 return
1023 to_cull = random.sample(tuple(sorted(self.digest_history)), n_to_cull)
1024 self.digest_history.difference_update(to_cull)
1025
1026 def deserialize(
1027 self,
1028 msg_list: list[bytes] | list[zmq.Message],
1029 content: bool = True,
1030 copy: bool = True,
1031 ) -> dict[str, t.Any]:
1032 """Unserialize a msg_list to a nested message dict.
1033
1034 This is roughly the inverse of serialize. The serialize/deserialize
1035 methods work with full message lists, whereas pack/unpack work with
1036 the individual message parts in the message list.
1037
1038 Parameters
1039 ----------
1040 msg_list : list of bytes or Message objects
1041 The list of message parts of the form [HMAC,p_header,p_parent,
1042 p_metadata,p_content,buffer1,buffer2,...].
1043 content : bool (True)
1044 Whether to unpack the content dict (True), or leave it packed
1045 (False).
1046 copy : bool (True)
1047 Whether msg_list contains bytes (True) or the non-copying Message
1048 objects in each place (False).
1049
1050 Returns
1051 -------
1052 msg : dict
1053 The nested message dict with top-level keys [header, parent_header,
1054 content, buffers]. The buffers are returned as memoryviews.
1055 """
1056 minlen = 5
1057 message = {}
1058 if not copy:
1059 # pyzmq didn't copy the first parts of the message, so we'll do it
1060 msg_list = t.cast(t.List[zmq.Message], msg_list)
1061 msg_list_beginning = [bytes(msg.bytes) for msg in msg_list[:minlen]]
1062 msg_list = t.cast(t.List[bytes], msg_list)
1063 msg_list = msg_list_beginning + msg_list[minlen:]
1064 msg_list = t.cast(t.List[bytes], msg_list)
1065 if self.auth is not None:
1066 signature = msg_list[0]
1067 if not signature:
1068 msg = "Unsigned Message"
1069 raise ValueError(msg)
1070 if signature in self.digest_history:
1071 raise ValueError("Duplicate Signature: %r" % signature)
1072 if content:
1073 # Only store signature if we are unpacking content, don't store if just peeking.
1074 self._add_digest(signature)
1075 check = self.sign(msg_list[1:5])
1076 if not compare_digest(signature, check):
1077 msg = "Invalid Signature: %r" % signature
1078 raise ValueError(msg)
1079 if not len(msg_list) >= minlen:
1080 msg = "malformed message, must have at least %i elements" % minlen
1081 raise TypeError(msg)
1082 header = self.unpack(msg_list[1])
1083 message["header"] = extract_dates(header)
1084 message["msg_id"] = header["msg_id"]
1085 message["msg_type"] = header["msg_type"]
1086 message["parent_header"] = extract_dates(self.unpack(msg_list[2]))
1087 message["metadata"] = self.unpack(msg_list[3])
1088 if content:
1089 message["content"] = self.unpack(msg_list[4])
1090 else:
1091 message["content"] = msg_list[4]
1092 buffers = [memoryview(b) for b in msg_list[5:]]
1093 if buffers and buffers[0].shape is None:
1094 # force copy to workaround pyzmq #646
1095 msg_list = t.cast(t.List[zmq.Message], msg_list)
1096 buffers = [memoryview(bytes(b.bytes)) for b in msg_list[5:]]
1097 message["buffers"] = buffers
1098 if self.debug:
1099 pprint.pprint(message) # noqa
1100 # adapt to the current version
1101 return adapt(message)
1102
1103 def unserialize(self, *args: t.Any, **kwargs: t.Any) -> dict[str, t.Any]:
1104 """**DEPRECATED** Use deserialize instead."""
1105 # pragma: no cover
1106 warnings.warn(
1107 "Session.unserialize is deprecated. Use Session.deserialize.",
1108 DeprecationWarning,
1109 stacklevel=2,
1110 )
1111 return self.deserialize(*args, **kwargs)