import builtins
from typing import *
import hail as hl
from hail.expr.expressions import *
from hail.expr.expressions.expression_typecheck import *
from hail.expr.types import *
from hail.genetics.reference_genome import reference_genome_type, ReferenceGenome
from hail.ir import *
from hail.typecheck import *
from hail.utils.java import Env
Coll_T = TypeVar('Collection_T', ArrayExpression, SetExpression)
Num_T = TypeVar('Numeric_T', Int32Expression, Int64Expression, Float32Expression, Float64Expression)
def _func(name, ret_type, *args):
indices, aggregations = unify_all(*args)
return construct_expr(Apply(name, *(a._ir for a in args)), ret_type, indices, aggregations)
def _seeded_func(name, ret_type, seed, *args):
seed = seed if seed is not None else Env.next_seed()
indices, aggregations = unify_all(*args)
return construct_expr(ApplySeeded(name, seed, *(a._ir for a in args)), ret_type, indices, aggregations)
[docs]@typecheck(t=hail_type)
def null(t: Union[HailType, str]):
"""Creates an expression representing a missing value of a specified type.
Examples
--------
>>> hl.eval(hl.null(hl.tarray(hl.tstr)))
None
>>> hl.eval(hl.null('array<str>'))
None
Notes
-----
This method is useful for constructing an expression that includes missing
values, since :obj:`None` cannot be interpreted as an expression.
Parameters
----------
t : :obj:`str` or :class:`.HailType`
Type of the missing expression.
Returns
-------
:class:`.Expression`
A missing expression of type `t`.
"""
return construct_expr(NA(t), t)
[docs]@typecheck(x=anytype, dtype=nullable(hail_type))
def literal(x: Any, dtype: Optional[Union[HailType, str]] = None):
"""Captures and broadcasts a Python variable or object as an expression.
Examples
--------
>>> table = hl.utils.range_table(8)
>>> greetings = hl.literal({1: 'Good morning', 4: 'Good afternoon', 6 : 'Good evening'})
>>> table.annotate(greeting = greetings.get(table.idx)).show()
+-------+----------------+
| index | greeting |
+-------+----------------+
| int32 | str |
+-------+----------------+
| 0 | NA |
| 1 | Good morning |
| 2 | NA |
| 3 | NA |
| 4 | Good afternoon |
| 5 | NA |
| 6 | Good evening |
| 7 | NA |
+-------+----------------+
Notes
-----
Use this function to capture large Python objects for use in expressions. This
function provides an alternative to adding an object as a global annotation on a
:class:`.Table` or :class:`.MatrixTable`.
Parameters
----------
x
Object to capture and broadcast as an expression.
Returns
-------
:class:`.Expression`
"""
wrapper = {'has_expr': False}
def typecheck_expr(t, x):
if isinstance(x, Expression):
wrapper['has_expr'] = True
if x.dtype != t:
raise TypeError(f"'literal': type mismatch: expected '{t}', found '{x.dtype}'")
elif x._indices.source is not None:
if x._indices.axes:
raise ExpressionException(f"'literal' can only accept scalar or global expression arguments,"
f" found indices {x._indices.axes}")
return False
elif x is None:
return False
else:
t._typecheck_one_level(x)
return True
if dtype is None:
dtype = impute_type(x)
try:
dtype._traverse(x, typecheck_expr)
except TypeError as e:
raise TypeError("'literal': object did not match the passed type '{}'"
.format(dtype)) from e
if wrapper['has_expr']:
return literal(hl.eval(to_expr(x, dtype)), dtype)
if x is None:
return hl.null(dtype)
elif is_primitive(dtype):
if dtype == tint32:
assert isinstance(x, builtins.int)
assert tint32.min_value <= x <= tint32.max_value
return construct_expr(I32(x), tint32)
elif dtype == tint64:
assert isinstance(x, builtins.int)
assert tint64.min_value <= x <= tint64.max_value
return construct_expr(I64(x), tint64)
elif dtype == tfloat32:
assert isinstance(x, (builtins.float, builtins.int))
return construct_expr(F32(x), tfloat32)
elif dtype == tfloat64:
assert isinstance(x, (builtins.float, builtins.int))
return construct_expr(F64(x), tfloat64)
elif dtype == tbool:
assert isinstance(x, builtins.bool)
return construct_expr(TrueIR() if x else FalseIR(), tbool)
else:
assert dtype == tstr
assert isinstance(x, builtins.str)
return construct_expr(Str(x), tstr)
else:
return construct_expr(Literal(dtype, x), dtype)
[docs]@typecheck(condition=expr_bool, consequent=expr_any, alternate=expr_any, missing_false=bool)
def cond(condition,
consequent,
alternate,
missing_false: bool = False):
"""Expression for an if/else statement; tests a condition and returns one of two options based on the result.
Examples
--------
>>> x = 5
>>> hl.eval(hl.cond(x < 2, 'Hi', 'Bye'))
'Bye'
>>> a = hl.literal([1, 2, 3, 4])
>>> hl.eval(hl.cond(hl.len(a) > 0, 2.0 * a, a / 2.0))
[2.0, 4.0, 6.0, 8.0]
Notes
-----
If `condition` evaluates to ``True``, returns `consequent`. If `condition`
evaluates to ``False``, returns `alternate`. If `predicate` is missing, returns
missing.
Note
----
The type of `consequent` and `alternate` must be the same.
Parameters
----------
condition : :class:`.BooleanExpression`
Condition to test.
consequent : :class:`.Expression`
Branch to return if the condition is ``True``.
alternate : :class:`.Expression`
Branch to return if the condition is ``False``.
missing_false : :obj:`.bool`
If ``True``, treat missing `condition` as ``False``.
See Also
--------
:func:`.case`, :func:`.switch`
Returns
-------
:class:`.Expression`
One of `consequent`, `alternate`, or missing, based on `condition`.
"""
if missing_false:
condition = hl.bind(lambda x: hl.is_defined(x) & x,
condition)
indices, aggregations = unify_all(condition, consequent, alternate)
consequent, alternate, success = unify_exprs(consequent, alternate)
if not success:
raise TypeError(f"'cond' requires the 'consequent' and 'alternate' arguments to have the same type\n"
f" consequent: type '{consequent.dtype}'\n"
f" alternate: type '{alternate.dtype}'")
assert consequent.dtype == alternate.dtype
return construct_expr(If(condition._ir, consequent._ir, alternate._ir),
consequent.dtype, indices, aggregations)
[docs]def case(missing_false: bool=False) -> 'hail.expr.builders.CaseBuilder':
"""Chain multiple if-else statements with a :class:`.CaseBuilder`.
Examples
--------
>>> x = hl.literal('foo bar baz')
>>> expr = (hl.case()
... .when(x[:3] == 'FOO', 1)
... .when(hl.len(x) == 11, 2)
... .when(x == 'secret phrase', 3)
... .default(0))
>>> hl.eval(expr)
2
Parameters
----------
missing_false : :obj:`bool`
Treat missing predicates as ``False``.
See Also
--------
:class:`.CaseBuilder`, :func:`.switch`, :func:`.cond`
Returns
-------
:class:`.CaseBuilder`.
"""
from .builders import CaseBuilder
return CaseBuilder(missing_false=missing_false)
[docs]@typecheck(expr=expr_any)
def switch(expr) -> 'hail.expr.builders.SwitchBuilder':
"""Build a conditional tree on the value of an expression.
Examples
--------
>>> csq = hl.literal('loss of function')
>>> expr = (hl.switch(csq)
... .when('synonymous', 1)
... .when('SYN', 1)
... .when('missense', 2)
... .when('MIS', 2)
... .when('loss of function', 3)
... .when('LOF', 3)
... .or_missing())
>>> hl.eval(expr)
3
See Also
--------
:class:`.SwitchBuilder`, :func:`.case`, :func:`.cond`
Parameters
----------
expr : :class:`.Expression`
Value to match against.
Returns
-------
:class:`.SwitchBuilder`
"""
from .builders import SwitchBuilder
return SwitchBuilder(expr)
[docs]@typecheck(f=anytype, exprs=expr_any)
def bind(f: Callable, *exprs):
"""Bind a temporary variable and use it in a function.
Examples
--------
>>> hl.eval(hl.bind(lambda x: x + 1, 1))
2
:func:`.bind` also can take multiple arguments:
>>> hl.eval(hl.bind(lambda x, y: x / y, x, x))
1.0
Parameters
----------
f : function ( (args) -> :class:`.Expression`)
Function of `exprs`.
exprs : variable-length args of :class:`.Expression`
Expressions to bind.
Returns
-------
:class:`.Expression`
Result of evaluating `f` with `exprs` as arguments.
"""
args = []
uids = []
irs = []
for expr in exprs:
uid = Env.get_uid()
args.append(construct_variable(uid, expr._type, expr._indices, expr._aggregations))
uids.append(uid)
irs.append(expr._ir)
lambda_result = to_expr(f(*args))
indices, aggregations = unify_all(*exprs, lambda_result)
res_ir = lambda_result._ir
for (uid, ir) in builtins.zip(uids, irs):
res_ir = Let(uid, ir, res_ir)
return construct_expr(res_ir, lambda_result.dtype, indices, aggregations)
[docs]@typecheck(c1=expr_int32, c2=expr_int32, c3=expr_int32, c4=expr_int32)
def chi_squared_test(c1, c2, c3, c4) -> StructExpression:
"""Performs chi-squared test of independence on a 2x2 contingency table.
Examples
--------
>>> hl.eval(hl.chi_squared_test(10, 10, 10, 10))
Struct(p_value=1.0, odds_ratio=1.0)
>>> hl.eval(hl.chi_squared_test(51, 43, 22, 92))
Struct(p_value=1.4626257805267089e-07, odds_ratio=4.959830866807611)
Notes
-----
The odds ratio is given by ``(c1 / c2) / (c3 / c4)``.
Returned fields may be ``nan`` or ``inf``.
Parameters
----------
c1 : int or :class:`.Expression` of type :py:data:`.tint32`
Value for cell 1.
c2 : int or :class:`.Expression` of type :py:data:`.tint32`
Value for cell 2.
c3 : int or :class:`.Expression` of type :py:data:`.tint32`
Value for cell 3.
c4 : int or :class:`.Expression` of type :py:data:`.tint32`
Value for cell 4.
Returns
-------
:class:`.StructExpression`
A :class:`.tstruct` expression with two fields, `p_value`
(:py:data:`.tfloat64`) and `odds_ratio` (:py:data:`.tfloat64`).
"""
ret_type = tstruct(p_value=tfloat64, odds_ratio=tfloat64)
return _func("chi_squared_test", ret_type, c1, c2, c3, c4)
[docs]@typecheck(c1=expr_int32, c2=expr_int32, c3=expr_int32, c4=expr_int32, min_cell_count=expr_int32)
def contingency_table_test(c1, c2, c3, c4, min_cell_count) -> StructExpression:
"""Performs chi-squared or Fisher's exact test of independence on a 2x2
contingency table.
Examples
--------
>>> hl.eval(hl.contingency_table_test(51, 43, 22, 92, min_cell_count=22))
Struct(p_value=1.4626257805267089e-07, odds_ratio=4.959830866807611)
>>> hl.eval(hl.contingency_table_test(51, 43, 22, 92, min_cell_count=23))
Struct(p_value=2.1564999740157304e-07, odds_ratio=4.918058171469967)
Notes
-----
If all cell counts are at least `min_cell_count`, the chi-squared test is
used. Otherwise, Fisher's exact test is used.
Returned fields may be ``nan`` or ``inf``.
Parameters
----------
c1 : int or :class:`.Expression` of type :py:data:`.tint32`
Value for cell 1.
c2 : int or :class:`.Expression` of type :py:data:`.tint32`
Value for cell 2.
c3 : int or :class:`.Expression` of type :py:data:`.tint32`
Value for cell 3.
c4 : int or :class:`.Expression` of type :py:data:`.tint32`
Value for cell 4.
min_cell_count : int or :class:`.Expression` of type :py:data:`.tint32`
Minimum count in every cell to use the chi-squared test.
Returns
-------
:class:`.StructExpression`
A :class:`.tstruct` expression with two fields, `p_value`
(:py:data:`.tfloat64`) and `odds_ratio` (:py:data:`.tfloat64`).
"""
ret_type = tstruct(p_value=tfloat64, odds_ratio=tfloat64)
return _func("contingency_table_test", ret_type, c1, c2, c3, c4, min_cell_count)
[docs]@typecheck(collection=expr_oneof(expr_dict(),
expr_set(expr_tuple([expr_any, expr_any])),
expr_array(expr_tuple([expr_any, expr_any]))))
def dict(collection) -> DictExpression:
"""Creates a dictionary.
Examples
--------
>>> hl.eval(hl.dict([('foo', 1), ('bar', 2), ('baz', 3)]))
{u'bar': 2, u'baz': 3, u'foo': 1}
Notes
-----
This method expects arrays or sets with elements of type :class:`.ttuple`
with 2 fields. The first field of the tuple becomes the key, and the second
field becomes the value.
Parameters
----------
collection : :class:`.DictExpression` or :class:`.ArrayExpression` or :class:`.SetExpression`
Returns
-------
:class:`.DictExpression`
"""
if isinstance(collection.dtype, tarray) or isinstance(collection.dtype, tset):
key_type, value_type = collection.dtype.element_type.types
return _func('dict', tdict(key_type, value_type), collection)
else:
assert isinstance(collection.dtype, tdict)
return collection
[docs]@typecheck(x=expr_float64, a=expr_float64, b=expr_float64)
def dbeta(x, a, b) -> Float64Expression:
"""
Returns the probability density at `x` of a `beta distribution
<https://en.wikipedia.org/wiki/Beta_distribution>`__ with parameters `a`
(alpha) and `b` (beta).
Examples
--------
>>> hl.eval(hl.dbeta(.2, 5, 20))
4.900377563180943
Parameters
----------
x : :obj:`float` or :class:`.Expression` of type :py:data:`.tfloat64`
Point in [0,1] at which to sample. If a < 1 then x must be positive.
If b < 1 then x must be less than 1.
a : :obj:`float` or :class:`.Expression` of type :py:data:`.tfloat64`
The alpha parameter in the beta distribution. The result is undefined
for non-positive a.
b : :obj:`float` or :class:`.Expression` of type :py:data:`.tfloat64`
The beta parameter in the beta distribution. The result is undefined
for non-positive b.
Returns
-------
:class:`.Float64Expression`
"""
return _func("dbeta", tfloat64, x, a, b)
[docs]@typecheck(x=expr_float64, lamb=expr_float64, log_p=expr_bool)
def dpois(x, lamb, log_p=False) -> Float64Expression:
"""Compute the (log) probability density at x of a Poisson distribution with rate parameter `lamb`.
Examples
--------
>>> hl.eval(hl.dpois(5, 3))
0.10081881344492458
Parameters
----------
x : :obj:`float` or :class:`.Expression` of type :py:data:`.tfloat64`
Non-negative number at which to compute the probability density.
lamb : :obj:`float` or :class:`.Expression` of type :py:data:`.tfloat64`
Poisson rate parameter. Must be non-negative.
log_p : :obj:`bool` or :class:`.BooleanExpression`
If true, the natural logarithm of the probability density is returned.
Returns
-------
:class:`.Expression` of type :py:data:`.tfloat64`
The (log) probability density.
"""
return _func("dpois", tfloat64, x, lamb, log_p)
[docs]@typecheck(x=expr_float64)
def exp(x) -> Float64Expression:
"""Computes `e` raised to the power `x`.
Examples
--------
>>> hl.eval(hl.exp(2))
7.38905609893065
Parameters
----------
x : float or :class:`.Expression` of type :py:data:`.tfloat64`
Returns
-------
:class:`.Expression` of type :py:data:`.tfloat64`
"""
return _func("exp", tfloat64, x)
[docs]@typecheck(c1=expr_int32, c2=expr_int32, c3=expr_int32, c4=expr_int32)
def fisher_exact_test(c1, c2, c3, c4) -> StructExpression:
"""Calculates the p-value, odds ratio, and 95% confidence interval using
Fisher's exact test for a 2x2 table.
Examples
--------
>>> hl.eval(hl.fisher_exact_test(10, 10, 10, 10))
Struct(p_value=1.0000000000000002, odds_ratio=1.0,
ci_95_lower=0.24385796914260355, ci_95_upper=4.100747675033819)
>>> hl.eval(hl.fisher_exact_test(51, 43, 22, 92))
Struct(p_value=2.1564999740157304e-07, odds_ratio=4.918058171469967,
ci_95_lower=2.5659373368248444, ci_95_upper=9.677929632035475)
Notes
-----
This method is identical to the version implemented in
`R <https://stat.ethz.ch/R-manual/R-devel/library/stats/html/fisher.test.html>`_ with default
parameters (two-sided, alpha = 0.05, null hypothesis that the odds ratio equals 1).
Returned fields may be ``nan`` or ``inf``.
Parameters
----------
c1 : int or :class:`.Expression` of type :py:data:`.tint32`
Value for cell 1.
c2 : int or :class:`.Expression` of type :py:data:`.tint32`
Value for cell 2.
c3 : int or :class:`.Expression` of type :py:data:`.tint32`
Value for cell 3.
c4 : int or :class:`.Expression` of type :py:data:`.tint32`
Value for cell 4.
Returns
-------
:class:`.StructExpression`
A :class:`.tstruct` expression with four fields, `p_value`
(:py:data:`.tfloat64`), `odds_ratio` (:py:data:`.tfloat64`),
`ci_95_lower (:py:data:`.tfloat64`), and `ci_95_upper`
(:py:data:`.tfloat64`).
"""
ret_type = tstruct(p_value=tfloat64,
odds_ratio=tfloat64,
ci_95_lower=tfloat64,
ci_95_upper=tfloat64)
return _func("fisher_exact_test", ret_type, c1, c2, c3, c4)
[docs]@typecheck(x=expr_oneof(expr_float32, expr_float64))
def floor(x):
"""The largest integral value that is less than or equal to `x`.
Examples
--------
>>> hl.eval(hl.floor(3.1))
3.0
Parameters
----------
x : :class:`.Float32Expression` or :class:`.Float64Expression`
Returns
-------
:class:`.Float32Expression` or :class:`.Float64Expression`
"""
return _func("floor", x.dtype, x)
[docs]@typecheck(x=expr_oneof(expr_float32, expr_float64))
def ceil(x):
"""The smallest integral value that is greater than or equal to `x`.
Examples
--------
>>> hl.eval(hl.ceil(3.1))
4.0
Parameters
----------
x : :class:`.Float32Expression` or :class:`.Float64Expression`
Returns
-------
:class:`.Float32Expression` or :class:`.Float64Expression`
"""
return _func("ceil", x.dtype, x)
[docs]@typecheck(n_hom_ref=expr_int32, n_het=expr_int32, n_hom_var=expr_int32)
def hardy_weinberg_test(n_hom_ref, n_het, n_hom_var) -> StructExpression:
"""Performs test of Hardy-Weinberg equilibrium.
Examples
--------
>>> hl.eval(hl.hardy_weinberg_test(250, 500, 250))
Struct(het_freq_hwe=0.5002501250625313, p_value=0.9747844394217698)
>>> hl.eval(hl.hardy_weinberg_test(37, 200, 85))
Struct(het_freq_hwe=0.48964964307448583, p_value=1.1337210383168987e-06)
Notes
-----
This method performs a two-sided exact test with mid-p-value correction of
`Hardy-Weinberg equilibrium <https://en.wikipedia.org/wiki/Hardy%E2%80%93Weinberg_principle>`__
via an efficient implementation of the
`Levene-Haldane distribution <https://hail.is/docs/devel/LeveneHaldane.pdf>`__,
which models the number of heterozygous individuals under equilibrium.
The mean of this distribution is ``(n_hom_ref * n_hom_var) / (2n - 1)`` where
``n = n_hom_ref + n_het + n_hom_var``. So the expected frequency of heterozygotes
under equilibrium, `het_freq_hwe`, is this mean divided by ``n``.
Parameters
----------
n_hom_ref : int or :class:`.Expression` of type :py:data:`.tint32`
Number of homozygous reference genotypes.
n_het : int or :class:`.Expression` of type :py:data:`.tint32`
Number of heterozygous genotypes.
n_hom_var : int or :class:`.Expression` of type :py:data:`.tint32`
Number of homozygous variant genotypes.
Returns
-------
:class:`.StructExpression`
A struct expression with two fields, `het_freq_hwe`
(:py:data:`.tfloat64`) and `p_value` (:py:data:`.tfloat64`).
"""
ret_type = tstruct(het_freq_hwe=tfloat64,
p_value=tfloat64)
return _func("hardy_weinberg_test", ret_type, n_hom_ref, n_het, n_hom_var)
[docs]@typecheck(contig=expr_str, pos=expr_int32,
reference_genome=reference_genome_type)
def locus(contig, pos, reference_genome: Union[str, ReferenceGenome] = 'default') -> LocusExpression:
"""Construct a locus expression from a chromosome and position.
Examples
--------
>>> hl.eval(hl.locus("1", 10000))
Locus(contig=1, position=10000, reference_genome=GRCh37)
Parameters
----------
contig : str or :class:`.StringExpression`
Chromosome.
pos : int or :class:`.Expression` of type :py:data:`.tint32`
Base position along the chromosome.
reference_genome : :obj:`str` or :class:`.ReferenceGenome`
Reference genome to use.
Returns
-------
:class:`.LocusExpression`
"""
fname = 'Locus({})'.format(reference_genome.name)
return _func(fname, tlocus(reference_genome), contig, pos)
[docs]@typecheck(global_pos=expr_int64,
reference_genome=reference_genome_type)
def locus_from_global_position(global_pos,
reference_genome: Union[str, ReferenceGenome] = 'default') -> LocusExpression:
"""Constructs a locus expression from a global position and a reference genome.
The inverse of :meth:`.LocusExpression.global_position`.
Examples
--------
>>> hl.eval(hl.locus_from_global_position(0))
Locus(contig=1, position=1, reference_genome=GRCh37)
>>> hl.eval(hl.locus_from_global_position(2824183054))
Locus(contig=21, position=42584230, reference_genome=GRCh37)
>>> hl.eval(hl.locus_from_global_position(2824183054, 'GRCh38'))
Locus(contig=22, position=1, reference_genome=GRCh38)
Parameters
----------
global_pos : int or :class:`.Expression` of type :py:data:`.tint64`
Global base position along the reference genome.
reference_genome : :obj:`str` or :class:`.ReferenceGenome`
Reference genome to use for converting the global position to a contig and local position.
Returns
-------
:class:`.LocusExpression`
"""
fname = 'globalPosToLocus({})'.format(reference_genome.name)
return _func(fname, tlocus(reference_genome), global_pos)
[docs]@typecheck(s=expr_str,
reference_genome=reference_genome_type)
def parse_locus(s, reference_genome: Union[str, ReferenceGenome] = 'default') -> LocusExpression:
"""Construct a locus expression by parsing a string or string expression.
Examples
--------
>>> hl.eval(hl.parse_locus("1:10000"))
Locus(contig=1, position=10000, reference_genome=GRCh37)
Notes
-----
This method expects strings of the form ``contig:position``, e.g. ``16:29500000``
or ``X:123456``.
Parameters
----------
s : str or :class:`.StringExpression`
String to parse.
reference_genome : :obj:`str` or :class:`.ReferenceGenome`
Reference genome to use.
Returns
-------
:class:`.LocusExpression`
"""
return _func('Locus({})'.format(reference_genome.name), tlocus(reference_genome), s)
[docs]@typecheck(s=expr_str,
reference_genome=reference_genome_type)
def parse_variant(s, reference_genome: Union[str, ReferenceGenome] = 'default') -> StructExpression:
"""Construct a struct with a locus and alleles by parsing a string.
Examples
--------
>>> hl.eval(hl.parse_variant('1:100000:A:T,C'))
Struct(locus=Locus('1', 100000), alleles=['A', 'T', 'C'])
Notes
-----
This method returns an expression of type :class:`.tstruct` with the
following fields:
- `locus` (:class:`.tlocus`)
- `alleles` (:class:`.tarray` of :py:data:`.tstr`)
Parameters
----------
s : :class:`.StringExpression`
String to parse.
reference_genome: :obj:`str` or :class:`.ReferenceGenome`
Reference genome to use.
Returns
-------
:class:`.StructExpression`
Struct with fields `locus` and `alleles`.
"""
t = tstruct(locus=tlocus(reference_genome),
alleles=tarray(tstr))
return _func('LocusAlleles({})'.format(reference_genome.name), t, s)
[docs]@typecheck(gp=expr_array(expr_float64))
def gp_dosage(gp) -> Float64Expression:
"""
Return expected genotype dosage from array of genotype probabilities.
Examples
--------
>>> hl.eval(hl.gp_dosage([0.0, 0.5, 0.5]))
1.5
Notes
-----
This function is only defined for bi-allelic variants. The `gp` argument
must be length 3. The value is ``gp[1] + 2 * gp[2]``.
Parameters
----------
gp : :class:`.ArrayFloat64Expression`
Length 3 array of bi-allelic genotype probabilities
Returns
-------
:class:`.Expression` of type :py:data:`.tfloat64`
"""
return _func("dosage", tfloat64, gp)
[docs]@typecheck(pl=expr_array(expr_int32))
def pl_dosage(pl) -> Float64Expression:
r"""
Return expected genotype dosage from array of Phred-scaled genotype
likelihoods with uniform prior. Only defined for bi-allelic variants. The
`pl` argument must be length 3.
For a PL array ``[a, b, c]``, let:
.. math::
a^\prime = 10^{-a/10} \\
b^\prime = 10^{-b/10} \\
c^\prime = 10^{-c/10} \\
The genotype dosage is given by:
.. math::
\frac{b^\prime + 2 c^\prime}
{a^\prime + b^\prime +c ^\prime}
Examples
--------
>>> hl.eval(hl.pl_dosage([5, 10, 100]))
0.24025307377482674
Parameters
----------
pl : :class:`.ArrayNumericExpression` of type :py:data:`.tint32`
Length 3 array of bi-allelic Phred-scaled genotype likelihoods
Returns
-------
:class:`.Expression` of type :py:data:`.tfloat64`
"""
return hl.sum(pl_to_gp(pl) * [0, 1, 2], filter_missing=False)
@typecheck(pl=expr_array(expr_int32), _cache_size=int)
def pl_to_gp(pl, _cache_size=2048) -> ArrayNumericExpression:
"""
Return the linear-scaled genotype probabilities from an array of Phred-scaled genotype likelihoods.
Examples
--------
>>> hl.eval(hl.pl_to_gp([0, 10, 100]))
[0.9090909090082644, 0.09090909090082644, 9.090909090082645e-11]
Notes
-----
This function assumes a uniform prior on the possible genotypes.
Parameters
----------
pl : :class:`.ArrayNumericExpression` of type :py:data:`.tint32`
Array of Phred-scaled genotype likelihoods.
Returns
-------
:class:`.ArrayNumericExpression` of type :py:data:`.tfloat64`
"""
phred_table = hl.literal([10 ** (-x/10.0) for x in builtins.range(_cache_size)])
gp = hl.bind(lambda pls: pls.map(lambda x: hl.cond(x >= _cache_size, 10 ** (-x/10.0), phred_table[x])), pl)
return hl.bind(lambda gp: gp / hl.sum(gp), gp)
[docs]@typecheck(start=expr_any, end=expr_any,
includes_start=expr_bool, includes_end=expr_bool)
def interval(start,
end,
includes_start=True,
includes_end=False) -> IntervalExpression:
"""Construct an interval expression.
Examples
--------
>>> hl.eval(hl.interval(5, 100))
Interval(start=5, end=100)
>>> hl.eval(hl.interval(hl.locus("1", 100), hl.locus("1", 1000)))
Interval(start=Locus(contig=1, position=100, reference_genome=GRCh37),
end=Locus(contig=1, position=1000, reference_genome=GRCh37))
Notes
-----
`start` and `end` must have the same type.
Parameters
----------
start : :class:`.Expression`
Start point.
end : :class:`.Expression`
End point.
includes_start : :class:`.BooleanExpression`
If ``True``, interval includes start point.
includes_end : :class:`.BooleanExpression`
If ``True``, interval includes end point.
Returns
-------
:class:`.IntervalExpression`
"""
if not start.dtype == end.dtype:
raise TypeError("Type mismatch of start and end points: '{}', '{}'".format(start.dtype, end.dtype))
return _func('Interval', tinterval(start.dtype), start, end, includes_start, includes_end)
[docs]@typecheck(contig=expr_str, start=expr_int32,
end=expr_int32, includes_start=expr_bool,
includes_end=expr_bool, reference_genome=reference_genome_type)
def locus_interval(contig,
start,
end,
includes_start=True,
includes_end=False,
reference_genome: Union[str, ReferenceGenome] = 'default') -> IntervalExpression:
"""Construct a locus interval expression.
Examples
--------
>>> hl.eval(hl.locus_interval("1", 100, 1000))
Interval(start=Locus(contig=1, position=100, reference_genome=GRCh37),
end=Locus(contig=1, position=1000, reference_genome=GRCh37))
Parameters
----------
contig : :class:`.StringExpression`
Contig name.
start : :class:`.Int32Expression`
Starting base position.
end : :class:`.Int32Expression`
End base position.
includes_start : :class:`.BooleanExpression`
If ``True``, interval includes start point.
includes_end : :class:`.BooleanExpression`
If ``True``, interval includes end point.
reference_genome : :obj:`str` or :class:`.hail.genetics.ReferenceGenome`
Reference genome to use.
Returns
-------
:class:`.IntervalExpression`
"""
fname = 'LocusInterval({})'.format(reference_genome.name)
return _func(fname, tinterval(tlocus(reference_genome)), contig, start, end, includes_start, includes_end)
[docs]@typecheck(s=expr_str,
reference_genome=reference_genome_type)
def parse_locus_interval(s, reference_genome: Union[str, ReferenceGenome] = 'default') -> IntervalExpression:
"""Construct a locus interval expression by parsing a string or string
expression.
Examples
--------
>>> hl.eval(hl.parse_locus_interval('1:1000-2000'))
Interval(start=Locus(contig=1, position=1000, reference_genome=GRCh37),
end=Locus(contig=1, position=2000, reference_genome=GRCh37))
>>> hl.eval(hl.parse_locus_interval('1:start-10M'))
Interval(start=Locus(contig=1, position=0, reference_genome=GRCh37),
end=Locus(contig=1, position=10000000, reference_genome=GRCh37))
Notes
-----
The start locus must precede the end locus. The default bounds of the
interval are left-inclusive and right-exclusive. To change this, add
one of ``[`` or ``(`` at the beginning of the string for left-inclusive
or left-exclusive respectively. Likewise, add one of ``]`` or ``)`` at
the end of the string for right-inclusive or right-exclusive
respectively.
There are several acceptable representations for `s`.
``CHR1:POS1-CHR2:POS2`` is the fully specified representation, and
we use this to define the various shortcut representations.
In a ``POS`` field, ``start`` (``Start``, ``START``) stands for 1.
In a ``POS`` field, ``end`` (``End``, ``END``) stands for the contig length.
In a ``POS`` field, the qualifiers ``m`` (``M``) and ``k`` (``K``) multiply
the given number by ``1,000,000`` and ``1,000``, respectively. ``1.6K`` is
short for 1600, and ``29M`` is short for 29000000.
``CHR:POS1-POS2`` stands for ``CHR:POS1-CHR:POS2``
``CHR1-CHR2`` stands for ``CHR1:START-CHR2:END``
``CHR`` stands for ``CHR:START-CHR:END``
Note
----
The bounds of the interval must be valid loci for the reference genome
(contig in reference genome and position is within the range [1-END])
except in the case where the position is ``0`` **AND** the interval is
**left-exclusive** which is normalized to be ``1`` and left-inclusive.
Likewise, in the case where the position is ``END + 1`` **AND**
the interval is **right-exclusive** which is normalized to be ``END``
and right-inclusive.
Parameters
----------
s : str or :class:`.StringExpression`
String to parse.
reference_genome : :obj:`str` or :class:`.hail.genetics.ReferenceGenome`
Reference genome to use.
Returns
-------
:class:`.IntervalExpression`
"""
return _func('LocusInterval({})'.format(reference_genome.name),
tinterval(tlocus(reference_genome)), s)
[docs]@typecheck(alleles=expr_int32,
phased=expr_bool)
def call(*alleles, phased=False) -> CallExpression:
"""Construct a call expression.
Examples
--------
>>> hl.eval(hl.call(1, 0))
Call(alleles=[1, 0], phased=False)
Parameters
----------
alleles : variable-length args of :obj:`int` or :class:`.Expression` of type :py:data:`.tint32`
List of allele indices.
phased : :obj:`bool`
If ``True``, preserve the order of `alleles`.
Returns
-------
:class:`.CallExpression`
"""
if builtins.len(alleles) > 2:
raise NotImplementedError("'call' supports a maximum of 2 alleles.")
return _func('Call', tcall, *alleles, phased)
[docs]@typecheck(gt_index=expr_int32)
def unphased_diploid_gt_index_call(gt_index) -> CallExpression:
"""Construct an unphased, diploid call from a genotype index.
Examples
--------
>>> hl.eval(hl.unphased_diploid_gt_index_call(4))
Call(alleles=[1, 2], phased=False)
Parameters
----------
gt_index : :obj:`int` or :class:`.Expression` of type :py:data:`.tint32`
Unphased, diploid genotype index.
Returns
-------
:class:`.CallExpression`
"""
return _func('UnphasedDiploidGtIndexCall', tcall, to_expr(gt_index))
[docs]@typecheck(s=expr_str)
def parse_call(s) -> CallExpression:
"""Construct a call expression by parsing a string or string expression.
Examples
--------
>>> hl.eval(hl.parse_call('0|2'))
Call([0, 2], phased=True)
Notes
-----
This method expects strings in the following format:
+--------+-----------------+-----------------+
| ploidy | Phased | Unphased |
+========+=================+=================+
| 0 | ``|-`` | ``-`` |
+--------+-----------------+-----------------+
| 1 | ``|i`` | ``i`` |
+--------+-----------------+-----------------+
| 2 | ``i|j`` | ``i/j`` |
+--------+-----------------+-----------------+
| 3 | ``i|j|k`` | ``i/j/k`` |
+--------+-----------------+-----------------+
| N | ``i|j|k|...|N`` | ``i/j/k/.../N`` |
+--------+-----------------+-----------------+
Parameters
----------
s : str or :class:`.StringExpression`
String to parse.
Returns
-------
:class:`.CallExpression`
"""
return _func('Call', tcall, s)
[docs]@typecheck(expression=expr_any)
def is_defined(expression) -> BooleanExpression:
"""Returns ``True`` if the argument is not missing.
Examples
--------
>>> hl.eval(hl.is_defined(5))
True
>>> hl.eval(hl.is_defined(hl.null(hl.tstr)))
False
>>> hl.eval(hl.is_defined(hl.null(hl.tbool) & True))
False
Parameters
----------
expression
Expression to test.
Returns
-------
:class:`.BooleanExpression`
``True`` if `expression` is not missing, ``False`` otherwise.
"""
return ~apply_expr(lambda x: IsNA(x), tbool, expression)
[docs]@typecheck(expression=expr_any)
def is_missing(expression) -> BooleanExpression:
"""Returns ``True`` if the argument is missing.
Examples
--------
>>> hl.eval(hl.is_missing(5))
False
>>> hl.eval(hl.is_missing(hl.null(hl.tstr)))
True
>>> hl.eval(hl.is_missing(hl.null(hl.tbool) & True))
True
Parameters
----------
expression
Expression to test.
Returns
-------
:class:`.BooleanExpression`
``True`` if `expression` is missing, ``False`` otherwise.
"""
return apply_expr(lambda x: IsNA(x), tbool, expression)
[docs]@typecheck(x=expr_oneof(expr_float32, expr_float64))
def is_nan(x) -> BooleanExpression:
"""Returns ``True`` if the argument is ``nan`` (not a number).
Examples
--------
>>> hl.eval(hl.is_nan(0))
False
>>> hl.eval(hl.is_nan(hl.literal(0) / 0))
True
>>> hl.eval(hl.is_nan(hl.literal(0) / hl.null(hl.tfloat64)))
None
Notes
-----
Note that :meth:`.is_missing` will return ``False`` on ``nan`` since ``nan``
is a defined value. Additionally, this method will return missing if `x` is
missing.
Parameters
----------
x : float or :class:`.Expression` of type :py:data:`.tfloat64`
Expression to test.
Returns
-------
:class:`.BooleanExpression`
``True`` if `x` is ``nan``, ``False`` otherwise.
"""
return _func("isnan", tbool, x)
[docs]@typecheck(x=expr_oneof(expr_float32, expr_float64))
def is_finite(x) -> BooleanExpression:
"""Returns ``True`` if the argument is a finite floating-point number.
Examples
--------
>>> hl.eval(hl.is_finite(0))
True
>>> hl.eval(hl.is_finite(float('nan')))
False
>>> hl.eval(hl.is_finite(float('inf')))
False
>>> hl.eval(hl.is_finite(hl.null('float32')))
None
Notes
-----
This method will return missing, not ``True``, if `x` is missing.
Parameters
----------
x : float or :class:`.Expression` of type :py:data:`.tfloat64`
Returns
-------
:class:`.BooleanExpression`
"""
return _func("is_finite", tbool, x)
[docs]@typecheck(x=expr_oneof(expr_float32, expr_float64))
def is_infinite(x) -> BooleanExpression:
"""Returns ``True`` if the argument is positive or negative infinity.
Examples
--------
>>> hl.eval(hl.is_infinite(0))
False
>>> hl.eval(hl.is_infinite(float('nan')))
False
>>> hl.eval(hl.is_infinite(float('inf')))
True
>>> hl.eval(hl.is_infinite(hl.null('float32')))
None
Notes
-----
This method will return missing, not ``False``, if `x` is missing.
Parameters
----------
x : float or :class:`.Expression` of type :py:data:`.tfloat64`
Returns
-------
:class:`.BooleanExpression`
"""
return _func("is_infinite", tbool, x)
[docs]@typecheck(x=expr_any)
def json(x) -> StringExpression:
"""Convert an expression to a JSON string expression.
Examples
--------
>>> hl.eval(hl.json([1,2,3,4,5]))
'[1,2,3,4,5]'
>>> hl.eval(hl.json(hl.struct(a='Hello', b=0.12345, c=[1,2], d={'hi', 'bye'})))
'{"a":"Hello","c":[1,2],"b":0.12345,"d":["bye","hi"]}'
Parameters
----------
x
Expression to convert.
Returns
-------
:class:`.StringExpression`
String expression with JSON representation of `x`.
"""
return _func("json", tstr, x)
[docs]@typecheck(x=expr_float64, base=nullable(expr_float64))
def log(x, base=None) -> Float64Expression:
"""Take the logarithm of the `x` with base `base`.
Examples
--------
>>> hl.eval(hl.log(10))
2.302585092994046
>>> hl.eval(hl.log(10, 10))
1.0
>>> hl.eval(hl.log(1024, 2))
10.0
Notes
-----
If the `base` argument is not supplied, then the natural logarithm is used.
Parameters
----------
x : float or :class:`.Expression` of type :py:data:`.tfloat64`
base : float or :class:`.Expression` of type :py:data:`.tfloat64`
Returns
-------
:class:`.Expression` of type :py:data:`.tfloat64`
"""
x = to_expr(x)
if base is not None:
return _func("log", tfloat64, x, to_expr(base))
else:
return _func("log", tfloat64, x)
[docs]@typecheck(x=expr_float64)
def log10(x) -> Float64Expression:
"""Take the logarithm of the `x` with base 10.
Examples
--------
>>> hl.eval(hl.log10(1000))
3.0
>>> hl.eval(hl.log10(0.0001123))
-3.949620243738542
Parameters
----------
x : float or :class:`.Expression` of type :py:data:`.tfloat64`
Returns
-------
:class:`.Expression` of type :py:data:`.tfloat64`
"""
return _func("log10", tfloat64, x)
[docs]@typecheck(args=expr_any)
def coalesce(*args):
"""Returns the first non-missing value of `args`.
Examples
--------
>>> x1 = hl.null('int')
>>> x2 = 2
>>> hl.eval(hl.coalesce(x1, x2))
2
Notes
-----
All arguments must have the same type, or must be convertible to a common
type (all numeric, for instance).
See Also
--------
:func:`.or_else`
Parameters
----------
args : variable-length args of :class:`.Expression`
Returns
-------
:class:`.Expression`
"""
if builtins.len(args) < 1:
raise ValueError("'coalesce' requires at least one expression argument")
*exprs, success = unify_exprs(*args)
if not success:
arg_types = ''.join([f"\n argument {i}: type '{arg.dtype}'" for i, arg in enumerate(exprs)])
raise TypeError(f"'coalesce' requires all arguments to have the same type or compatible types"
f"{arg_types}")
def make_case(*expr_args):
c = case()
for e in expr_args:
c = c.when(hl.is_defined(e), e)
return c.or_missing()
return bind(make_case, *exprs)
[docs]@typecheck(a=expr_any, b=expr_any)
def or_else(a, b):
"""If `a` is missing, return `b`.
Examples
--------
>>> hl.eval(hl.or_else(5, 7))
5
>>> hl.eval(hl.or_else(hl.null(hl.tint32), 7))
7
See Also
--------
:func:`.coalesce`
Parameters
----------
a: :class:`.Expression`
b: :class:`.Expression`
Returns
-------
:class:`.Expression`
"""
a, b, success = unify_exprs(a, b)
if not success:
raise TypeError(f"'or_else' requires the 'a' and 'b' arguments to have the same type\n"
f" a: type '{a.dtype}'\n"
f" b: type '{b.dtype}'")
assert a.dtype == b.dtype
return hl.cond(hl.is_defined(a), a, b)
[docs]@typecheck(predicate=expr_bool, value=expr_any)
def or_missing(predicate, value):
"""Returns `value` if `predicate` is ``True``, otherwise returns missing.
Examples
--------
>>> hl.eval(hl.or_missing(True, 5))
5
>>> hl.eval(hl.or_missing(False, 5))
None
Parameters
----------
predicate : :class:`.BooleanExpression`
value : :class:`.Expression`
Value to return if `predicate` is ``True``.
Returns
-------
:class:`.Expression`
This expression has the same type as `b`.
"""
return hl.cond(predicate, value, hl.null(value.dtype))
[docs]@typecheck(x=expr_int32, n=expr_int32, p=expr_float64,
alternative=enumeration("two.sided", "greater", "less"))
def binom_test(x, n, p, alternative: str) -> Float64Expression:
"""Performs a binomial test on `p` given `x` successes in `n` trials.
Examples
--------
>>> hl.eval(hl.binom_test(5, 10, 0.5, 'less'))
0.6230468749999999
With alternative ``less``, the p-value is the probability of at most `x`
successes, i.e. the cumulative probability at `x` of the distribution
Binom(`n`, `p`). With ``greater``, the p-value is the probably of at least
`x` successes. With ``two.sided``, the p-value is the total probability of
all outcomes with probability at most that of `x`.
Returns the p-value from the `exact binomial test
<https://en.wikipedia.org/wiki/Binomial_test>`__ of the null hypothesis that
success has probability `p`, given `x` successes in `n` trials.
Parameters
----------
x : int or :class:`.Expression` of type :py:data:`.tint32`
Number of successes.
n : int or :class:`.Expression` of type :py:data:`.tint32`
Number of trials.
p : float or :class:`.Expression` of type :py:data:`.tfloat64`
Probability of success, between 0 and 1.
alternative
: One of, "two.sided", "greater", "less".
Returns
-------
:class:`.Expression` of type :py:data:`.tfloat64`
p-value.
"""
alt_enum = {"two.sided": 0, "greater": 1, "less": 2}[alternative]
return _func("binomTest", tfloat64, x, n, p, to_expr(alt_enum))
[docs]@typecheck(x=expr_float64, df=expr_float64)
def pchisqtail(x, df) -> Float64Expression:
"""Returns the probability under the right-tail starting at x for a chi-squared
distribution with df degrees of freedom.
Examples
--------
>>> hl.eval(hl.pchisqtail(5, 1))
0.025347318677468304
Parameters
----------
x : float or :class:`.Expression` of type :py:data:`.tfloat64`
df : float or :class:`.Expression` of type :py:data:`.tfloat64`
Degrees of freedom.
Returns
-------
:class:`.Expression` of type :py:data:`.tfloat64`
"""
return _func("pchisqtail", tfloat64, x, df)
[docs]@typecheck(x=expr_float64)
def pnorm(x) -> Float64Expression:
"""The cumulative probability function of a standard normal distribution.
Examples
--------
>>> hl.eval(hl.pnorm(0))
0.5
>>> hl.eval(hl.pnorm(1))
0.8413447460685429
>>> hl.eval(hl.pnorm(2))
0.9772498680518208
Notes
-----
Returns the left-tail probability `p` = Prob(:math:Z < x) with :math:Z a standard normal random variable.
Parameters
----------
x : float or :class:`.Expression` of type :py:data:`.tfloat64`
Returns
-------
:class:`.Expression` of type :py:data:`.tfloat64`
"""
return _func("pnorm", tfloat64, x)
[docs]@typecheck(x=expr_float64, lamb=expr_float64, lower_tail=expr_bool, log_p=expr_bool)
def ppois(x, lamb, lower_tail=True, log_p=False) -> Float64Expression:
"""The cumulative probability function of a Poisson distribution.
Examples
--------
>>> hl.eval(hl.ppois(2, 1))
0.9196986029286058
Notes
-----
If `lower_tail` is true, returns Prob(:math:`X \leq` `x`) where :math:`X` is a
Poisson random variable with rate parameter `lamb`. If `lower_tail` is false,
returns Prob(:math:`X` > `x`).
Parameters
----------
x : float or :class:`.Expression` of type :py:data:`.tfloat64`
lamb : float or :class:`.Expression` of type :py:data:`.tfloat64`
Rate parameter of Poisson distribution.
lower_tail : bool or :class:`.BooleanExpression`
If ``True``, compute the probability of an outcome at or below `x`,
otherwise greater than `x`.
log_p : bool or :class:`.BooleanExpression`
Return the natural logarithm of the probability.
Returns
-------
:class:`.Expression` of type :py:data:`.tfloat64`
"""
return _func("ppois", tfloat64, x, lamb, lower_tail, log_p)
[docs]@typecheck(p=expr_float64, df=expr_float64)
def qchisqtail(p, df) -> Float64Expression:
"""Inverts :meth:`.pchisqtail`.
Examples
--------
>>> hl.eval(hl.qchisqtail(0.01, 1))
6.634896601021213
Notes
-----
Returns right-quantile `x` for which `p` = Prob(:math:`Z^2` > x) with :math:`Z^2` a chi-squared random
variable with degrees of freedom specified by `df`. `p` must satisfy 0 < `p` <= 1.
Parameters
----------
p : float or :class:`.Expression` of type :py:data:`.tfloat64`
Probability.
df : float or :class:`.Expression` of type :py:data:`.tfloat64`
Degrees of freedom.
Returns
-------
:class:`.Expression` of type :py:data:`.tfloat64`
"""
return _func("qchisqtail", tfloat64, p, df)
[docs]@typecheck(p=expr_float64)
def qnorm(p) -> Float64Expression:
"""Inverts :meth:`.pnorm`.
Examples
--------
>>> hl.eval(hl.qnorm(0.90))
1.2815515655446008
Notes
-----
Returns left-quantile `x` for which p = Prob(:math:`Z` < x) with :math:`Z` a standard normal random variable.
`p` must satisfy 0 < `p` < 1.
Parameters
----------
p : float or :class:`.Expression` of type :py:data:`.tfloat64`
Probability.
Returns
-------
:class:`.Expression` of type :py:data:`.tfloat64`
"""
return _func("qnorm", tfloat64, p)
[docs]@typecheck(p=expr_float64, lamb=expr_float64, lower_tail=expr_bool, log_p=expr_bool)
def qpois(p, lamb, lower_tail=True, log_p=False) -> Float64Expression:
"""Inverts :meth:`.ppois`.
Examples
--------
>>> hl.eval(hl.qpois(0.99, 1))
4
Notes
-----
Returns the smallest integer :math:`x` such that Prob(:math:`X \leq x`) :math:`\geq` `p` where :math:`X`
is a Poisson random variable with rate parameter `lambda`.
Parameters
----------
p : float or :class:`.Expression` of type :py:data:`.tfloat64`
lamb : float or :class:`.Expression` of type :py:data:`.tfloat64`
Rate parameter of Poisson distribution.
lower_tail : bool or :class:`.BooleanExpression`
Corresponds to `lower_tail` parameter in inverse :meth:`.ppois`.
log_p : bool or :class:`.BooleanExpression`
Exponentiate `p` before testing.
Returns
-------
:class:`.Expression` of type :py:data:`.tfloat64`
"""
return _func("qpois", tint32, p, lamb, lower_tail, log_p)
[docs]@typecheck(start=expr_int32, stop=expr_int32, step=expr_int32)
def range(start, stop, step=1) -> ArrayNumericExpression:
"""Returns an array of integers from `start` to `stop` by `step`.
Examples
--------
>>> hl.eval(hl.range(0, 10))
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
>>> hl.eval(hl.range(0, 10, step=3))
[0, 3, 6, 9]
Notes
-----
The range includes `start`, but excludes `stop`.
Parameters
----------
start : int or :class:`.Expression` of type :py:data:`.tint32`
Start of range.
stop : int or :class:`.Expression` of type :py:data:`.tint32`
End of range.
step : int or :class:`.Expression` of type :py:data:`.tint32`
Step of range.
Returns
-------
:class:`.ArrayInt32Expression`
"""
return apply_expr(lambda sta, sto, ste: ArrayRange(sta, sto, ste), tarray(tint32), start, stop, step)
[docs]@typecheck(p=expr_float64, seed=nullable(int))
def rand_bool(p, seed=None) -> BooleanExpression:
"""Returns ``True`` with probability `p`.
Examples
--------
>>> hl.eval(hl.rand_bool(0.5))
True
>>> hl.eval(hl.rand_bool(0.5))
False
Parameters
----------
p : :obj:`float` or :class:`.Float64Expression`
Probability between 0 and 1.
seed : :obj:`int`, optional
Random seed.
Returns
-------
:class:`.BooleanExpression`
"""
return _seeded_func("rand_bool", tbool, seed, p)
[docs]@typecheck(mean=expr_float64, sd=expr_float64, seed=nullable(int))
def rand_norm(mean=0, sd=1, seed=None) -> Float64Expression:
"""Samples from a normal distribution with mean `mean` and standard
deviation `sd`.
Examples
--------
>>> hl.eval(hl.rand_norm())
1.5388475315213386
>>> hl.eval(hl.rand_norm())
-0.3006188509144124
Parameters
----------
mean : :obj:`float` or :class:`.Float64Expression`
Mean of normal distribution.
sd : float or :class:`.Expression` of type :py:data:`.tfloat64`
Standard deviation of normal distribution.
seed : :obj:`int`, optional
Random seed.
Returns
-------
:class:`.Float64Expression`
"""
return _seeded_func("rand_norm", tfloat64, seed, mean, sd)
[docs]@typecheck(lamb=expr_float64, seed=nullable(int))
def rand_pois(lamb, seed=None) -> Float64Expression:
"""Samples from a `Poisson distribution
<https://en.wikipedia.org/wiki/Poisson_distribution>`__ with rate parameter
`lamb`.
Examples
--------
>>> hl.eval(hl.rand_pois(1))
2.0
>>> hl.eval(hl.rand_pois(1))
3.0
Parameters
----------
lamb : :obj:`float` or :class:`.Float64Expression`
Rate parameter for Poisson distribution.
seed : :obj:`int`, optional
Random seed.
Returns
-------
:class:`.Float64Expression`
"""
return _seeded_func("rand_pois", tfloat64, seed, lamb)
[docs]@typecheck(lower=expr_float64, upper=expr_float64, seed=nullable(int))
def rand_unif(lower, upper, seed=None) -> Float64Expression:
"""Samples from a uniform distribution within the interval
[`lower`, `upper`].
Examples
--------
>>> hl.eval(hl.rand_unif(0, 1))
0.7983073825816226
>>> hl.eval(hl.rand_unif(0, 1))
0.5161799497741769
Parameters
----------
lower : :obj:`float` or :class:`.Float64Expression`
Left boundary of range.
upper : :obj:`float` or :class:`.Float64Expression`
Right boundary of range.
seed : :obj:`int`, optional
Random seed.
Returns
-------
:class:`.Float64Expression`
"""
return _seeded_func("rand_unif", tfloat64, seed, lower, upper)
[docs]@typecheck(a=expr_float64,
b=expr_float64,
lower=nullable(expr_float64),
upper=nullable(expr_float64),
seed=nullable(int))
def rand_beta(a, b, lower=None, upper=None, seed=None) -> Float64Expression:
"""Samples from a `beta distribution
<https://en.wikipedia.org/wiki/Beta_distribution>`__ with parameters `a`
(alpha) and `b` (beta).
Notes
-----
The optional parameters `lower` and `upper` represent a truncated beta
distribution with parameters a and b and support `[lower, upper]`. Draws are
made via rejection sampling, i.e. returning the first draw from Beta(a,b)
that falls in range `[lower, upper]`. This procedure may be slow if the
probability mass of Beta(a,b) over `[lower, upper]` is small.
Examples
--------
>>> hl.eval(hl.rand_beta(0, 1))
0.6696807666871818
>>> hl.eval(hl.rand_beta(0, 1))
0.8512985039011525
Parameters
----------
a : :obj:`float` or :class:`.Float64Expression`
b : :obj:`float` or :class:`.Float64Expression`
lower : :obj:`float` or :class:`.Float64Expression`, optional
Lower boundary of truncated beta distribution.
upper : :obj:`float` or :class:`.Float64Expression`, optional
Upper boundary of truncated beta distribution.
seed : :obj:`int`, optional
Random seed.
Returns
-------
:class:`.Float64Expression`
"""
if lower is None and upper is None:
return _seeded_func("rand_beta", tfloat64, seed, a, b)
if lower is None:
lower = hl.literal(0)
if upper is None:
upper = hl.literal(1)
return _seeded_func("rand_beta", tfloat64, seed, a, b, lower, upper)
[docs]@typecheck(shape=expr_float64,
scale=expr_float64,
seed=nullable(int))
def rand_gamma(shape, scale, seed=None) -> Float64Expression:
"""Samples from a `gamma distribution
<https://en.wikipedia.org/wiki/Gamma_distribution>`__
with parameters `shape` and `scale`.
Examples
--------
>>> hl.eval(hl.rand_gamma(1, 1))
2.3915947710237537
>>> hl.eval(hl.rand_gamma(1, 1))
0.1339939936379711
Parameters
----------
shape : :obj:`float` or :class:`.Float64Expression`
scale : :obj:`float` or :class:`.Float64Expression`
seed : :obj:`int`, optional
Random seed.
Returns
-------
:class:`.Float64Expression`
"""
return _seeded_func("rand_gamma", tfloat64, seed, shape, scale)
[docs]@typecheck(prob=expr_array(expr_float64),
seed=nullable(int))
def rand_cat(prob, seed=None) -> Int32Expression:
"""Samples from a `categorical distribution
<https://en.wikipedia.org/wiki/Categorical_distribution>`__.
Notes
-----
The categories correspond to the indices of `prob`, an unnormalized
probability mass function. The probability of drawing index ``i`` is
``prob[i]/sum(prob)``.
Warning
-------
This function may be slow when the number of categories is large.
Examples
--------
>>> hl.eval(hl.rand_cat([0, 1.7, 2]))
2
>>> hl.eval(hl.rand_cat([0, 1.7, 2]))
1
Parameters
----------
prob : :obj:`list` of float or :class:`.ArrayExpression` of type :py:data:`.tfloat64`
seed : :obj:`int` or `None`
If not `None`, function will be seeded with provided seed.
Returns
-------
:class:`.Int32Expression`
"""
return _seeded_func("rand_cat", tint32, seed, prob)
[docs]@typecheck(a=expr_array(expr_float64),
seed=nullable(int))
def rand_dirichlet(a, seed=None) -> ArrayExpression:
"""Samples from a `Dirichlet distribution
<https://en.wikipedia.org/wiki/Dirichlet_distribution>`__.
Examples
--------
>>> hl.eval(hl.rand_dirichlet([1, 1, 1]))
[0.4630197581640282,0.18207753442497876,0.3549027074109931]
>>> hl.eval(hl.rand_dirichlet([1, 1, 1]))
[0.20851948405364765,0.7873859423649898,0.004094573581362475]
Parameters
----------
a : :obj:`list` of float or :class:`.ArrayExpression` of type :py:data:`.tfloat64`
Array of non-negative concentration parameters.
seed : :obj:`int`, optional
Random seed.
Returns
-------
:class:`.Float64Expression`
"""
return hl.bind(lambda x: x / hl.sum(x),
a.map(lambda p:
hl.cond(p == 0.0,
0.0,
hl.rand_gamma(p, 1, seed=seed))))
[docs]@typecheck(x=expr_float64)
def sqrt(x) -> Float64Expression:
"""Returns the square root of `x`.
Examples
--------
>>> hl.eval(hl.sqrt(3))
1.7320508075688772
Notes
-----
It is also possible to exponentiate expression with standard Python syntax,
e.g. ``x ** 0.5``.
Parameters
----------
x : float or :class:`.Expression` of type :py:data:`.tfloat64`
Returns
-------
:class:`.Expression` of type :py:data:`.tfloat64`
"""
return _func("sqrt", tfloat64, x)
[docs]@typecheck(x=expr_array(expr_float64), y=expr_array(expr_float64))
def corr(x, y) -> Float64Expression:
"""Compute the
`Pearson correlation coefficient <https://en.wikipedia.org/wiki/Pearson_correlation_coefficient>`__
between `x` and `y`.
Examples
--------
>>> hl.eval(hl.corr([1, 2, 4], [2, 3, 1]))
-0.6546536707079772
Notes
-----
Only indices where both `x` and `y` are non-missing will be included in the
calculation.
If `x` and `y` have length zero, then the result is missing.
Parameters
----------
x : :class:`.Expression` of type ``array<tfloat64>``
y : :class:`.Expression` of type ``array<tfloat64>``
Returns
-------
:class:`.Float64Expression`
"""
return _func("corr", tfloat64, x, y)
_base_regex = "^([ACGTNM])+$"
_symbolic_regex = r"(^\.)|(\.$)|(^<)|(>$)|(\[)|(\])"
_allele_types = ["Unknown", "SNP", "MNP", "Insertion", "Deletion", "Complex", "Star", "Symbolic"]
_allele_enum = {i: v for i, v in enumerate(_allele_types)}
_allele_ints = {v: k for k, v in _allele_enum.items()}
@typecheck(ref=expr_str, alt=expr_str)
def _num_allele_type(ref, alt) -> Int32Expression:
return hl.bind(lambda r, a:
hl.cond(r.matches(_base_regex),
hl.case()
.when(a.matches(_base_regex), hl.case()
.when(r.length() == a.length(),
hl.cond(r.length() == 1,
hl.cond(r != a, _allele_ints['SNP'], _allele_ints['Unknown']),
hl.cond(hamming(r, a) == 1,
_allele_ints['SNP'],
_allele_ints['MNP'])))
.when((r.length() < a.length()) & (r[0] == a[0]) & a.endswith(r[1:]),
_allele_ints["Insertion"])
.when((r[0] == a[0]) & r.endswith(a[1:]),
_allele_ints["Deletion"])
.default(_allele_ints['Complex']))
.when(a == '*', _allele_ints['Star'])
.when(a.matches(_symbolic_regex), _allele_ints['Symbolic'])
.default(_allele_ints['Unknown']),
_allele_ints['Unknown']),
ref, alt)
[docs]@typecheck(ref=expr_str, alt=expr_str)
def is_snp(ref, alt) -> BooleanExpression:
"""Returns ``True`` if the alleles constitute a single nucleotide polymorphism.
Examples
--------
>>> hl.eval(hl.is_snp('A', 'T'))
True
Parameters
----------
ref : :class:`.StringExpression`
Reference allele.
alt : :class:`.StringExpression`
Alternate allele.
Returns
-------
:class:`.BooleanExpression`
"""
return _num_allele_type(ref, alt) == _allele_ints["SNP"]
[docs]@typecheck(ref=expr_str, alt=expr_str)
def is_mnp(ref, alt) -> BooleanExpression:
"""Returns ``True`` if the alleles constitute a multiple nucleotide polymorphism.
Examples
--------
>>> hl.eval(hl.is_mnp('AA', 'GT'))
True
Parameters
----------
ref : :class:`.StringExpression`
Reference allele.
alt : :class:`.StringExpression`
Alternate allele.
Returns
-------
:class:`.BooleanExpression`
"""
return _num_allele_type(ref, alt) == _allele_ints["MNP"]
[docs]@typecheck(ref=expr_str, alt=expr_str)
def is_transition(ref, alt) -> BooleanExpression:
"""Returns ``True`` if the alleles constitute a transition.
Examples
--------
>>> hl.eval(hl.is_transition('A', 'T'))
False
>>> hl.eval(hl.is_transition('AAA', 'AGA'))
True
Parameters
----------
ref : :class:`.StringExpression`
Reference allele.
alt : :class:`.StringExpression`
Alternate allele.
Returns
-------
:class:`.BooleanExpression`
"""
return is_snp(ref, alt) & _is_snp_transition(ref, alt)
[docs]@typecheck(ref=expr_str, alt=expr_str)
def is_transversion(ref, alt) -> BooleanExpression:
"""Returns ``True`` if the alleles constitute a transversion.
Examples
--------
>>> hl.eval(hl.is_transversion('A', 'T'))
True
>>> hl.eval(hl.is_transversion('AAA', 'AGA'))
False
Parameters
----------
ref : :class:`.StringExpression`
Reference allele.
alt : :class:`.StringExpression`
Alternate allele.
Returns
-------
:class:`.BooleanExpression`
"""
return is_snp(ref, alt) & (~(_is_snp_transition(ref, alt)))
@typecheck(ref=expr_str, alt=expr_str)
def _is_snp_transition(ref, alt) -> BooleanExpression:
indices = hl.range(0, ref.length())
return hl.any(lambda i: ((ref[i] != alt[i]) & (((ref[i] == 'A') & (alt[i] == 'G')) |
((ref[i] == 'G') & (alt[i] == 'A')) |
((ref[i] == 'C') & (alt[i] == 'T')) |
((ref[i] == 'T') & (alt[i] == 'C')))), indices)
[docs]@typecheck(ref=expr_str, alt=expr_str)
def is_insertion(ref, alt) -> BooleanExpression:
"""Returns ``True`` if the alleles constitute an insertion.
Examples
--------
>>> hl.eval(hl.is_insertion('A', 'ATT'))
True
Parameters
----------
ref : :class:`.StringExpression`
Reference allele.
alt : :class:`.StringExpression`
Alternate allele.
Returns
-------
:class:`.BooleanExpression`
"""
return _num_allele_type(ref, alt) == _allele_ints["Insertion"]
[docs]@typecheck(ref=expr_str, alt=expr_str)
def is_deletion(ref, alt) -> BooleanExpression:
"""Returns ``True`` if the alleles constitute a deletion.
Examples
--------
>>> hl.eval(hl.is_deletion('ATT', 'A'))
True
Parameters
----------
ref : :class:`.StringExpression`
Reference allele.
alt : :class:`.StringExpression`
Alternate allele.
Returns
-------
:class:`.BooleanExpression`
"""
return _num_allele_type(ref, alt) == _allele_ints["Deletion"]
[docs]@typecheck(ref=expr_str, alt=expr_str)
def is_indel(ref, alt) -> BooleanExpression:
"""Returns ``True`` if the alleles constitute an insertion or deletion.
Examples
--------
>>> hl.eval(hl.is_indel('ATT', 'A'))
True
Parameters
----------
ref : :class:`.StringExpression`
Reference allele.
alt : :class:`.StringExpression`
Alternate allele.
Returns
-------
:class:`.BooleanExpression`
"""
return hl.bind(lambda t: (t == _allele_ints["Insertion"]) |
(t == _allele_ints["Deletion"]),
_num_allele_type(ref, alt))
[docs]@typecheck(ref=expr_str, alt=expr_str)
def is_star(ref, alt) -> BooleanExpression:
"""Returns ``True`` if the alleles constitute an upstream deletion.
Examples
--------
>>> hl.eval(hl.is_deletion('A', '*'))
True
Parameters
----------
ref : :class:`.StringExpression`
Reference allele.
alt : :class:`.StringExpression`
Alternate allele.
Returns
-------
:class:`.BooleanExpression`
"""
return _num_allele_type(ref, alt) == _allele_ints["Star"]
[docs]@typecheck(ref=expr_str, alt=expr_str)
def is_complex(ref, alt) -> BooleanExpression:
"""Returns ``True`` if the alleles constitute a complex polymorphism.
Examples
--------
>>> hl.eval(hl.is_deletion('ATT', 'GCA'))
True
Parameters
----------
ref : :class:`.StringExpression`
Reference allele.
alt : :class:`.StringExpression`
Alternate allele.
Returns
-------
:class:`.BooleanExpression`
"""
return _num_allele_type(ref, alt) == _allele_ints["Complex"]
[docs]@typecheck(ref=expr_str, alt=expr_str)
def is_strand_ambiguous(ref, alt) -> BooleanExpression:
"""Returns ``True`` if the alleles are strand ambiguous.
Strand ambiguous allele pairs are ``A/T``, ``T/A``,
``C/G``, and ``G/C`` where the first allele is `ref`
and the second allele is `alt`.
Examples
--------
>>> hl.eval(hl.is_strand_ambiguous('A', 'T'))
True
Parameters
----------
ref : :class:`.StringExpression`
Reference allele.
alt : :class:`.StringExpression`
Alternate allele.
Returns
-------
:class:`.BooleanExpression`
"""
alleles = hl.literal({('A', 'T'), ('T', 'A'), ('G', 'C'), ('C', 'G')})
return alleles.contains((ref, alt))
[docs]@typecheck(ref=expr_str, alt=expr_str)
def allele_type(ref, alt)-> StringExpression:
"""Returns the type of the polymorphism as a string.
Examples
--------
>>> hl.eval(hl.allele_type('A', 'T'))
'SNP'
>>> hl.eval(hl.allele_type('ATT', 'A'))
'Deletion'
Notes
-----
The possible return values are:
- ``"SNP"``
- ``"MNP"``
- ``"Insertion"``
- ``"Deletion"``
- ``"Complex"``
- ``"Star"``
- ``"Symbolic"``
- ``"Unknown"``
Parameters
----------
ref : :class:`.StringExpression`
Reference allele.
alt : :class:`.StringExpression`
Alternate allele.
Returns
-------
:class:`.StringExpression`
"""
return hl.literal(_allele_types)[_num_allele_type(ref, alt)]
[docs]@typecheck(s1=expr_str, s2=expr_str)
def hamming(s1, s2) -> Int32Expression:
"""Returns the Hamming distance between the two strings.
Examples
--------
>>> hl.eval(hl.hamming('ATATA', 'ATGCA'))
2
>>> hl.eval(hl.hamming('abcdefg', 'zzcdefz'))
3
Notes
-----
This method will fail if the two strings have different length.
Parameters
----------
s1 : :class:`.StringExpression`
First string.
s2 : :class:`.StringExpression`
Second string.
Returns
-------
:class:`.Expression` of type :py:data:`.tint32`
"""
return _func("hamming", tint32, s1, s2)
[docs]@typecheck(s=expr_str)
def entropy(s) -> Float64Expression:
r"""Returns the `Shannon entropy <https://en.wikipedia.org/wiki/Entropy_(information_theory)>`__
of the character distribution defined by the string.
Examples
--------
>>> hl.eval(hl.entropy('ac'))
1.0
>>> hl.eval(hl.entropy('accctg'))
1.79248
Notes
-----
For a string of length :math:`n` with :math:`k` unique characters
:math:`\{ c_1, \dots, c_k \}`, let :math:`p_i` be the probability that
a randomly chosen character is :math:`c_i`, e.g. the number of instances
of :math:`c_i` divided by :math:`n`. Then the base-2 Shannon entropy is
given by
.. math::
H = \sum_{i=1}^k p_i \log_2(p_i).
Parameters
----------
s : :class:`.StringExpression`
Returns
-------
:class:`.Expression` of type :py:data:`.tfloat64`
"""
return _func("entropy", tfloat64, s)
[docs]@typecheck(x=expr_any)
def str(x) -> StringExpression:
"""Returns the string representation of `x`.
Examples
--------
>>> hl.eval(hl.str(hl.struct(a=5, b=7)))
'{"a": 5, "b": 7}'
Parameters
----------
x
Returns
-------
:class:`.StringExpression`
"""
if x.dtype == tstr:
return x
else:
return _func("str", tstr, x)
[docs]@typecheck(c=expr_call, i=expr_int32)
def downcode(c, i) -> CallExpression:
"""Create a new call by setting all alleles other than i to ref
Examples
--------
Preserve the third allele and downcode all other alleles to reference.
>>> hl.eval(hl.downcode(hl.call(1, 2), 2))
Call(alleles=[0, 2], phased=False)
Parameters
----------
c : :class:`.CallExpression`
A call.
i : :class:`.Expression` of type :py:data:`.tint32`
The index of the allele that will be sent to the alternate allele. All
other alleles will be downcoded to reference.
Returns
-------
:class:`.CallExpression`
"""
return _func("downcode", tcall, c, i)
@typecheck(pl=expr_array(expr_int32))
def gq_from_pl(pl) -> Int32Expression:
"""Compute genotype quality from Phred-scaled probability likelihoods.
Examples
--------
>>> hl.eval(hl.gq_from_pl([0, 69, 1035]))
69
Parameters
----------
pl : :class:`.ArrayInt32Expression`
Returns
-------
:class:`.Expression` of type :py:data:`.tint32`
"""
return _func("gqFromPL", tint32, pl)
[docs]@typecheck(n=expr_int32)
def triangle(n) -> Int32Expression:
"""Returns the triangle number of `n`.
Examples
--------
>>> hl.eval(hl.triangle(3))
6
Notes
-----
The calculation is ``n * (n + 1) / 2``.
Parameters
----------
n : :class:`.Expression` of type :py:data:`.tint32`
Returns
-------
:class:`.Expression` of type :py:data:`.tint32`
"""
return _func("triangle", tint32, n)
[docs]@typecheck(f=func_spec(1, expr_bool),
collection=expr_oneof(expr_set(), expr_array()))
def filter(f: Callable, collection):
"""Returns a new collection containing elements where `f` returns ``True``.
Examples
--------
>>> a = [1, 2, 3, 4]
>>> s = {'Alice', 'Bob', 'Charlie'}
>>> hl.eval(hl.filter(lambda x: x % 2 == 0, a))
[2, 4]
>>> hl.eval(hl.filter(lambda x: ~(x[-1] == 'e'), s))
{'Bob'}
Notes
-----
Returns a same-type expression; evaluated on a :class:`.SetExpression`, returns a
:class:`.SetExpression`. Evaluated on an :class:`.ArrayExpression`,
returns an :class:`.ArrayExpression`.
Parameters
----------
f : function ( (arg) -> :class:`.BooleanExpression`)
Function to evaluate for each element of the collection. Must return a
:class:`.BooleanExpression`.
collection : :class:`.ArrayExpression` or :class:`.SetExpression`.
Array or set expression to filter.
Returns
-------
:class:`.ArrayExpression` or :class:`.SetExpression`
Expression of the same type as `collection`.
"""
return collection.filter(f)
[docs]@typecheck(f=func_spec(1, expr_bool),
collection=expr_oneof(expr_set(), expr_array()))
def any(f: Callable, collection) -> BooleanExpression:
"""Returns ``True`` if `f` returns ``True`` for any element.
Examples
--------
>>> a = ['The', 'quick', 'brown', 'fox']
>>> s = {1, 3, 5, 6, 7, 9}
>>> hl.eval(hl.any(lambda x: x[-1] == 'x', a))
True
>>> hl.eval(hl.any(lambda x: x % 4 == 0, s))
False
Notes
-----
This method returns ``False`` for empty collections.
Parameters
----------
f : function ( (arg) -> :class:`.BooleanExpression`)
Function to evaluate for each element of the collection. Must return a
:class:`.BooleanExpression`.
collection : :class:`.ArrayExpression` or :class:`.SetExpression`
Collection expression.
Returns
-------
:class:`.BooleanExpression`.
``True`` if `f` returns ``True`` for any element, ``False`` otherwise.
"""
return collection.any(f)
[docs]@typecheck(f=func_spec(1, expr_bool),
collection=expr_oneof(expr_set(), expr_array()))
def all(f: Callable, collection) -> BooleanExpression:
"""Returns ``True`` if `f` returns ``True`` for every element.
Examples
--------
>>> a = ['The', 'quick', 'brown', 'fox']
>>> s = {1, 3, 5, 6, 7, 9}
>>> hl.eval(hl.all(lambda x: hl.len(x) > 3, a))
False
>>> hl.eval(hl.all(lambda x: x < 10, s))
True
Notes
-----
This method returns ``True`` if the collection is empty.
Parameters
----------
f : function ( (arg) -> :class:`.BooleanExpression`)
Function to evaluate for each element of the collection. Must return a
:class:`.BooleanExpression`.
collection : :class:`.ArrayExpression` or :class:`.SetExpression`
Collection expression.
Returns
-------
:class:`.BooleanExpression`.
``True`` if `f` returns ``True`` for every element, ``False`` otherwise.
"""
return collection.all(f)
[docs]@typecheck(f=func_spec(1, expr_bool),
collection=expr_oneof(expr_set(), expr_array()))
def find(f: Callable, collection):
"""Returns the first element where `f` returns ``True``.
Examples
--------
>>> a = ['The', 'quick', 'brown', 'fox']
>>> s = {1, 3, 5, 6, 7, 9}
>>> hl.eval(hl.find(lambda x: x[-1] == 'x', a))
'fox'
>>> hl.eval(hl.find(lambda x: x % 4 == 0, s))
None
Notes
-----
If `f` returns ``False`` for every element, then the result is missing.
Sets are unordered. If `collection` is of type :class:`.tset`, then the
element returned comes from no guaranteed ordering.
Parameters
----------
f : function ( (arg) -> :class:`.BooleanExpression`)
Function to evaluate for each element of the collection. Must return a
:class:`.BooleanExpression`.
collection : :class:`.ArrayExpression` or :class:`.SetExpression`
Collection expression.
Returns
-------
:class:`.Expression`
Expression whose type is the element type of the collection.
"""
return collection.find(f)
[docs]@typecheck(f=func_spec(1, expr_any),
collection=expr_oneof(expr_set(), expr_array()))
def flatmap(f: Callable, collection):
"""Map each element of the collection to a new collection, and flatten the results.
Examples
--------
>>> a = [[0, 1], [1, 2], [4, 5, 6, 7]]
>>> hl.eval(hl.flatmap(lambda x: x[1:], a))
[1, 2, 5, 6, 7]
Parameters
----------
f : function ( (arg) -> :class:`.CollectionExpression`)
Function from the element type of the collection to the type of the
collection. For instance, `flatmap` on a ``set<str>`` should take
a ``str`` and return a ``set``.
collection : :class:`.ArrayExpression` or :class:`.SetExpression`
Collection expression.
Returns
-------
:class:`.ArrayExpression` or :class:`.SetExpression`
"""
expected_type, s = (tarray, 'Array') if isinstance(collection.dtype, tarray) else (tset, 'Set')
def unify_ret(t):
if not isinstance(t, expected_type):
raise TypeError("'flatmap' expects 'f' to return an expression of type '{}', found '{}'".format(s, t))
return t
return collection.flatmap(f)
[docs]@typecheck(f=func_spec(1, expr_any),
collection=expr_oneof(expr_set(), expr_array()))
def group_by(f: Callable, collection) -> DictExpression:
"""Group collection elements into a dict according to a lambda function.
Examples
--------
>>> a = ['The', 'quick', 'brown', 'fox']
>>> hl.eval(hl.group_by(lambda x: hl.len(x), a))
{5: ['quick', 'brown'], 3: ['The', 'fox']}
Parameters
----------
f : function ( (arg) -> :class:`.Expression`)
Function to evaluate for each element of the collection to produce a key for the
resulting dictionary.
collection : :class:`.ArrayExpression` or :class:`.SetExpression`
Collection expression.
Returns
-------
:class:`.DictExpression`.
Dictionary keyed by results of `f`.
"""
return collection.group_by(f)
[docs]@typecheck(f=func_spec(2, expr_any),
zero=expr_any,
collection=expr_oneof(expr_set(), expr_array()))
def fold(f: Callable, zero, collection) -> Expression:
"""Reduces a collection with the given function `f`, provided the initial value `zero`.
Examples
--------
>>> a = [0, 1, 2]
>>> hl.eval(hl.fold(lambda i, j: i + j, 0, a))
3
Parameters
----------
f : function ( (:class:`.Expression`, :class:`.Expression`) -> :class:`.Expression`)
Function which takes the cumulative value and the next element, and
returns a new value.
zero : :class:`.Expression`
Initial value to pass in as left argument of `f`.
collection : :class:`.ArrayExpression` or :class:`.SetExpression`
Returns
-------
:class:`.Expression`
"""
return collection.fold(lambda x, y: f(x, y), zero)
[docs]@typecheck(f=func_spec(2, expr_any),
zero=expr_any,
a=expr_array())
def array_scan(f: Callable, zero, a) -> ArrayExpression:
"""Map each element of `a` to cumulative value of function `f`, with initial value `zero`.
Examples
--------
>>> a = [0, 1, 2]
>>> hl.eval(hl.array_scan(lambda i, j: i + j, 0, a))
[0, 0, 1, 3]
Parameters
----------
f : function ( (:class:`.Expression`, :class:`.Expression`) -> :class:`.Expression`)
Function which takes the cumulative value and the next element, and
returns a new value.
zero : :class:`.Expression`
Initial value to pass in as left argument of `f`.
a : :class:`.ArrayExpression`
Returns
-------
:class:`.ArrayExpression`.
"""
return a.scan(lambda x, y: f(x, y), zero)
[docs]@typecheck(arrays=expr_array(), fill_missing=bool)
def zip(*arrays, fill_missing: bool = False) -> ArrayExpression:
"""Zip together arrays into a single array.
Examples
--------
>>> hl.eval(hl.zip([1], [10, 20], [100, 200, 300]))
[(1, 10, 100)]
>>> hl.eval(hl.zip([1], [10, 20], [100, 200, 300], fill_missing=True))
[(1, 10, 100), (None, 20, 200), (None, None, 300)]
Notes
-----
The element type of the resulting array is a :class:`.ttuple` with a field
for each array.
Parameters
----------
arrays: : variable-length args of :class:`.ArrayExpression`
Array expressions.
fill_missing : :obj:`bool`
If ``False``, return an array with length equal to the shortest length
of the `arrays`. If ``True``, return an array equal to the longest
length of the `arrays`, by extending the shorter arrays with missing
values.
Returns
-------
:class:`.ArrayExpression`
"""
n_arrays = builtins.len(arrays)
if fill_missing:
def _(array_lens):
result_len = hl.max(array_lens)
indices = hl.range(0, result_len)
return hl.map(lambda i: builtins.tuple(
hl.cond(i < array_lens[j], arrays[j][i], hl.null(arrays[j].dtype.element_type))
for j in builtins.range(n_arrays)), indices)
return bind(_, [hl.len(a) for a in arrays])
else:
def _(array_lens):
result_len = hl.min(array_lens)
indices = hl.range(0, result_len)
return hl.map(lambda i: builtins.tuple(arrays[j][i] for j in builtins.range(n_arrays)), indices)
return bind(_, [hl.len(a) for a in arrays])
[docs]@typecheck(a=expr_array())
def zip_with_index(a):
"""Returns an array of (index, element) tuples.
Examples
--------
>>> hl.eval(hl.zip_with_index(['A', 'B', 'C']))
[(0, 'A'), (1, 'B'), (2, 'C')]
Parameters
----------
a : :class:`.ArrayExpression`
Returns
-------
:class:`.ArrayExpression`
Array of (index, element) tuples.
"""
return bind(lambda aa: range(0, len(aa)).map(lambda i: (i, aa[i])), a)
[docs]@typecheck(f=func_spec(1, expr_any),
collection=expr_oneof(expr_set(), expr_array()))
def map(f: Callable, collection):
"""Transform each element of a collection.
Examples
--------
>>> a = ['The', 'quick', 'brown', 'fox']
>>> hl.eval(hl.map(lambda x: hl.len(x), a))
[3, 5, 5, 3]
Parameters
----------
f : function ( (arg) -> :class:`.Expression`)
Function to transform each element of the collection.
collection : :class:`.ArrayExpression` or :class:`.SetExpression`
Collection expression.
Returns
-------
:class:`.ArrayExpression` or :class:`SetExpression`.
Collection where each element has been transformed by `f`.
"""
return collection.map(f)
[docs]@typecheck(x=expr_oneof(expr_set(), expr_array(), expr_dict(), expr_str, expr_tuple(), expr_struct()))
def len(x) -> Int32Expression:
"""Returns the size of a collection or string.
Examples
--------
>>> a = ['The', 'quick', 'brown', 'fox']
>>> s = {1, 3, 5, 6, 7, 9}
>>> hl.eval(hl.len(a))
4
>>> hl.eval(hl.len(s))
6
>>> hl.eval(hl.len("12345"))
5
Parameters
----------
x : :class:`.ArrayExpression` or :class:`.SetExpression` or :class:`.DictExpression` or :class:`.StringExpression`
String or collection expression.
Returns
-------
:class:`.Expression` of type :py:data:`.tint32`
"""
if isinstance(x.dtype, ttuple) or isinstance(x.dtype, tstruct):
return hl.int32(builtins.len(x))
elif x.dtype == tstr:
return apply_expr(lambda x: StringLength(x), tint32, x)
else:
return apply_expr(lambda x: ArrayLen(x), tint32, array(x))
[docs]@typecheck(exprs=expr_oneof(expr_numeric, expr_set(expr_numeric), expr_array(expr_numeric)),
filter_missing=bool)
def max(*exprs, filter_missing: bool = True) -> NumericExpression:
"""Returns the maximum element of a collection or of given numeric expressions.
Examples
--------
Take the maximum value of an array:
>>> hl.eval(hl.max([1, 3, 5, 6, 7, 9]))
9
Take the maximum value of values:
>>> hl.eval(hl.max(1, 50, 2))
50
Notes
-----
Like the Python builtin ``max`` function, this function can either take a
single iterable expression (an array or set of numeric elements), or
variable-length arguments of numeric expressions.
Note
----
Missing arguments / array elements are ignored if `filter_missing` is ``True``.
If `filter_missing` is ``False``, then any missing element causes the result to
be missing.
Parameters
----------
exprs : :class:`.ArrayExpression` or :class:`.SetExpression` or varargs of :class:`.NumericExpression`
Single numeric array or set, or multiple numeric values.
filter_missing : :obj:`bool`
Remove missing elements from the collection before computing product.
Returns
-------
:class:`.NumericExpression`
"""
if builtins.len(exprs) < 1:
raise ValueError("'max' requires at least one argument")
if builtins.len(exprs) == 1:
expr = exprs[0]
if not (isinstance(expr.dtype, tset) or isinstance(expr.dtype, tarray)):
raise TypeError("'max' expects a single numeric array expression or multiple numeric expressions\n"
" Found 1 argument of type '{}'".format(expr.dtype))
return expr._filter_missing_method(filter_missing, 'max', expr.dtype.element_type)
else:
if not builtins.all(is_numeric(e.dtype) for e in exprs):
raise TypeError("'max' expects a single numeric array expression or multiple numeric expressions\n"
" Found {} arguments with types '{}'".format(builtins.len(exprs), ', '.join(
"'{}'".format(e.dtype) for e in exprs)))
return max(hl.array(list(exprs)), filter_missing=filter_missing)
[docs]@typecheck(exprs=expr_oneof(expr_numeric, expr_set(expr_numeric), expr_array(expr_numeric)),
filter_missing=bool)
def min(*exprs, filter_missing: bool = True) -> NumericExpression:
"""Returns the minimum of a collection or of given numeric expressions.
Examples
--------
Take the minimum value of an array:
>>> hl.eval(hl.min([2, 3, 5, 6, 7, 9]))
2
Take the minimum value:
>>> hl.eval(hl.min(12, 50, 2))
2
Notes
-----
Like the Python builtin ``min`` function, this function can either take a
single iterable expression (an array or set of numeric elements), or
variable-length arguments of numeric expressions.
Note
----
Missing arguments / array elements are ignored if `filter_missing` is ``True``.
If `filter_missing` is ``False``, then any missing element causes the result to
be missing.
Parameters
----------
exprs : :class:`.ArrayExpression` or :class:`.SetExpression` or varargs of :class:`.NumericExpression`
Single numeric array or set, or multiple numeric values.
filter_missing : :obj:`bool`
Remove missing elements from the collection before computing product.
Returns
-------
:class:`.NumericExpression`
"""
if builtins.len(exprs) < 1:
raise ValueError("'min' requires at least one argument")
if builtins.len(exprs) == 1:
expr = exprs[0]
if not (isinstance(expr.dtype, tset) or isinstance(expr.dtype, tarray)):
raise TypeError("'min' expects a single numeric array expression or multiple numeric expressions\n"
" Found 1 argument of type '{}'".format(expr.dtype))
return expr._filter_missing_method(filter_missing, 'min', expr.dtype.element_type)
else:
if not builtins.all(is_numeric(e.dtype) for e in exprs):
raise TypeError("'min' expects a single numeric array expression or multiple numeric expressions\n"
" Found {} arguments with types '{}'".format(builtins.len(exprs), ', '.join(
"'{}'".format(e.dtype) for e in exprs)))
return min(hl.array(list(exprs)), filter_missing=filter_missing)
[docs]@typecheck(x=expr_oneof(expr_numeric, expr_array(expr_numeric)))
def abs(x):
"""Take the absolute value of a numeric value or array.
Examples
--------
>>> hl.eval(hl.abs(-5))
5
>>> hl.eval(hl.abs([1.0, -2.5, -5.1]))
[1.0, 2.5, 5.1]
Parameters
----------
x : :class:`.NumericExpression` or :class:`.ArrayNumericExpression`
Returns
-------
:class:`.NumericExpression` or :class:`.ArrayNumericExpression`.
"""
if isinstance(x.dtype, tarray):
return map(abs, x)
else:
return x._method('abs', x.dtype)
[docs]@typecheck(x=expr_oneof(expr_numeric, expr_array(expr_numeric)))
def sign(x):
"""Returns the sign of a numeric value or array.
Examples
--------
>>> hl.eval(hl.sign(-1.23))
-1.0
>>> hl.eval(hl.sign([-4, 0, 5]))
[-1, 0, 1]
>>> hl.eval(hl.sign([0.0, 3.14]))
[0.0, 1.0]
>>> hl.eval(hl.sign(float('nan'))) # doctest: +SKIP
nan
Notes
-----
The sign function preserves type and maps ``nan`` to ``nan``.
Parameters
----------
x : :class:`.NumericExpression` or :class:`.ArrayNumericExpression`
Returns
-------
:class:`.NumericExpression` or :class:`.ArrayNumericExpression`.
"""
if isinstance(x.dtype, tarray):
return map(sign, x)
else:
return x._method('sign', x.dtype)
[docs]@typecheck(collection=expr_oneof(expr_set(expr_numeric), expr_array(expr_numeric)),
filter_missing=bool)
def mean(collection, filter_missing: bool = True) -> Float64Expression:
"""Returns the mean of all values in the collection.
Examples
--------
>>> a = [1, 3, 5, 6, 7, 9]
>>> hl.eval(hl.mean(a))
5.2
Note
----
Missing elements are ignored if `filter_missing` is ``True``. If `filter_missing`
is ``False``, then any missing element causes the result to be missing.
Parameters
----------
collection : :class:`.ArrayExpression` or :class:`.SetExpression`
Collection expression with numeric element type.
filter_missing : :obj:`bool`
Remove missing elements from the collection before computing product.
Returns
-------
:class:`.Expression` of type :py:data:`.tfloat64`
"""
return collection._filter_missing_method(filter_missing, "mean", tfloat64)
[docs]@typecheck(collection=expr_oneof(expr_set(expr_numeric), expr_array(expr_numeric)),
filter_missing=bool)
def product(collection, filter_missing: bool = True) -> NumericExpression:
"""Returns the product of values in the collection.
Examples
--------
>>> a = [1, 3, 5, 6, 7, 9]
>>> hl.eval(hl.product(a))
5670
Note
----
Missing elements are ignored if `filter_missing` is ``True``. If `filter_missing`
is ``False``, then any missing element causes the result to be missing.
Parameters
----------
collection : :class:`.ArrayExpression` or :class:`.SetExpression`
Collection expression with numeric element type.
filter_missing : :obj:`bool`
Remove missing elements from the collection before computing product.
Returns
-------
:class:`.NumericExpression`
"""
return collection._filter_missing_method(filter_missing, "product", collection.dtype.element_type)
[docs]@typecheck(collection=expr_oneof(expr_set(expr_numeric), expr_array(expr_numeric)),
filter_missing=bool)
def sum(collection, filter_missing: bool = True) -> NumericExpression:
"""Returns the sum of values in the collection.
Examples
--------
>>> a = [1, 3, 5, 6, 7, 9]
>>> hl.eval(hl.sum(a))
31
Note
----
Missing elements are ignored if `filter_missing` is ``True``. If `filter_missing`
is ``False``, then any missing element causes the result to be missing.
Parameters
----------
collection : :class:`.ArrayExpression` or :class:`.SetExpression`
Collection expression with numeric element type.
filter_missing : :obj:`bool`
Remove missing elements from the collection before computing product.
Returns
-------
:class:`.NumericExpression`
"""
return collection._filter_missing_method(filter_missing, "sum", collection.dtype.element_type)
[docs]@typecheck(a=expr_array(expr_numeric),
filter_missing=bool)
def cumulative_sum(a, filter_missing: bool = True) -> ArrayNumericExpression:
"""Returns an array of the cumulative sum of values in the array.
Examples
--------
>>> a = [1, 3, 5, 6, 7, 9]
>>> hl.eval(hl.cumulative_sum(a))
[1, 4, 9, 15, 22, 31]
Note
----
Missing elements are ignored if `filter_missing` is ``True``. If `filter_missing`
is ``False``, then any missing element causes the result to be missing.
Parameters
----------
a : :class:`.ArrayNumericExpression`
Array expression with numeric element type.
filter_missing : :obj:`bool`
Remove missing elements from the collection before computing product.
Returns
-------
:class:`.ArrayNumericExpression`
"""
if filter_missing:
a = a.filter(hl.is_defined)
return a.scan(lambda accum, elt: accum + elt, 0)[1:]
[docs]@typecheck(kwargs=expr_any)
def struct(**kwargs) -> StructExpression:
"""Construct a struct expression.
Examples
--------
>>> s = hl.struct(a=5, b='Foo')
>>> hl.eval(s.a)
5
Returns
-------
:class:`.StructExpression`
Keyword arguments as a struct.
"""
return StructExpression._from_fields(kwargs)
[docs]def tuple(iterable: Iterable) -> TupleExpression:
"""Construct a tuple expression.
Examples
--------
>>> t = hl.tuple([1, 2, '3'])
>>> hl.eval(t)
(1, 2, '3')
>>> hl.eval(t[2])
'3'
Parameters
----------
args : :obj:`Iterable` of :class:`.Expression`
Tuple elements.
Returns
-------
:class:`.TupleExpression`
"""
t = builtins.tuple(iterable)
return to_expr(t)
[docs]@typecheck(collection=expr_oneof(expr_set(), expr_array()))
def set(collection) -> SetExpression:
"""Convert a set expression.
Examples
--------
>>> s = hl.set(['Bob', 'Charlie', 'Alice', 'Bob', 'Bob'])
>>> s.show()
{'Alice', 'Bob', 'Charlie'}
Returns
-------
:class:`.SetExpression`
Set of all unique elements.
"""
if isinstance(collection.dtype, tset):
return collection
return apply_expr(lambda c: ToSet(c), tset(collection.dtype.element_type), collection)
[docs]@typecheck(t=hail_type)
def empty_set(t: Union[HailType, str]) -> SetExpression:
"""Returns an empty set of elements of a type `t`.
Examples
--------
>>> hl.eval(hl.empty_set(hl.tstr))
set()
Parameters
----------
t : :obj:`str` or :class:`.HailType`
Type of the set elements.
Returns
-------
:class:`.SetExpression`
"""
return hl.set(empty_array(t))
[docs]@typecheck(collection=expr_oneof(expr_set(), expr_array(), expr_dict()))
def array(collection) -> ArrayExpression:
"""Construct an array expression.
Examples
--------
>>> s = {'Bob', 'Charlie', 'Alice'}
>>> hl.eval(hl.array(s))
['Charlie', 'Alice', 'Bob']
Parameters
----------
collection : :class:`.ArrayExpression` or :class:`.SetExpression` or :class:`.DictExpression`
Returns
-------
:class:`.ArrayExpression`
"""
if isinstance(collection.dtype, tarray):
return collection
elif isinstance(collection.dtype, tset):
return apply_expr(lambda c: ToArray(c), tarray(collection.dtype.element_type), collection)
else:
assert isinstance(collection.dtype, tdict)
return _func('dictToArray', tarray(ttuple(collection.dtype.key_type, collection.dtype.value_type)), collection)
[docs]@typecheck(t=hail_type)
def empty_array(t: Union[HailType, str]) -> ArrayExpression:
"""Returns an empty array of elements of a type `t`.
Examples
--------
>>> hl.eval(hl.empty_array(hl.tint32))
[]
Parameters
----------
t : :obj:`str` or :class:`.HailType`
Type of the array elements.
Returns
-------
:class:`.ArrayExpression`
"""
array_t = hl.tarray(t)
ir = MakeArray([], array_t)
return construct_expr(ir, array_t)
@typecheck(key_type=hail_type, value_type=hail_type)
def empty_dict(key_type: Union[HailType, str], value_type: Union[HailType, str]) -> DictExpression:
"""Returns an empty dictionary with key type `key_type` and value type
`value_type`.
Examples
--------
>>> hl.eval(hl.empty_dict(hl.tstr, hl.tint32))
{}
Parameters
----------
key_type : :obj:`str` or :class:`.HailType`
Type of the keys.
value_type : :obj:`str` or :class:`.HailType`
Type of the values.
Returns
-------
:class:`.DictExpression`
"""
return hl.dict(hl.empty_array(hl.ttuple(key_type, value_type)))
[docs]@typecheck(collection=expr_oneof(expr_set(expr_set()), expr_array(expr_array())))
def flatten(collection):
"""Flatten a nested collection by concatenating sub-collections.
Examples
--------
>>> a = [[1, 2], [2, 3]]
>>> hl.eval(hl.flatten(a))
[1, 2, 2, 3]
Parameters
----------
collection : :class:`.ArrayExpression` or :class:`.SetExpression`
Collection with element type :class:`.tarray` or :class:`.tset`.
Returns
-------
collection : :class:`.ArrayExpression` or :class:`.SetExpression`
"""
return collection.flatmap(lambda x: x)
[docs]@typecheck(collection=expr_oneof(expr_array(), expr_set()),
delimiter=expr_str)
def delimit(collection, delimiter=',') -> StringExpression:
"""Joins elements of `collection` into single string delimited by `delimiter`.
Examples
--------
>>> a = ['Bob', 'Charlie', 'Alice', 'Bob', 'Bob']
>>> hl.eval(hl.delimit(a))
'Bob,Charlie,Alice,Bob,Bob'
Notes
-----
If the element type of `collection` is not :py:data:`.tstr`, then the
:func:`str` function will be called on each element before joining with
the delimiter.
Parameters
----------
collection : :class:`.ArrayExpression` or :class:`.SetExpression`
Collection.
delimiter : str or :class:`.StringExpression`
Field delimiter.
Returns
-------
:class:`.StringExpression`
Joined string expression.
"""
if not collection.dtype.element_type == tstr:
collection = map(str, collection)
return collection._method("mkString", tstr, delimiter)
[docs]@typecheck(collection=expr_array(),
key=nullable(func_spec(1, expr_any)),
reverse=expr_bool)
def sorted(collection,
key: Optional[Callable]=None,
reverse=False) -> ArrayExpression:
"""Returns a sorted array.
Examples
--------
>>> a = ['Charlie', 'Alice', 'Bob']
>>> hl.eval(hl.sorted(a))
['Alice', 'Bob', 'Charlie']
>>> hl.eval(hl.sorted(a, reverse=False))
['Charlie', 'Bob', 'Alice']
>>> hl.eval(hl.sorted(a, key=lambda x: hl.len(x)))
['Bob', 'Alice', 'Charlie']
Notes
-----
The ordered types are :py:data:`.tstr` and numeric types.
Parameters
----------
collection : :class:`.ArrayExpression`
Array to sort.
key: function ( (arg) -> :class:`.Expression`), optional
Function to evaluate for each element to compute sort key.
reverse : :class:`.BooleanExpression`
Sort in descending order.
Returns
-------
:class:`.ArrayExpression`
Sorted array.
"""
ascending = ~reverse
if key is None:
return collection._method("sort", collection.dtype, ascending)
else:
with_key = collection.map(lambda elt: hl.tuple([key(elt), elt]))
ir = ArraySort(with_key._ir, ascending._ir, on_key=True)
indices, aggregations = unify_all(with_key, ascending)
return construct_expr(ir, with_key.dtype, indices, aggregations).map(lambda elt: elt[1])
[docs]@typecheck(array=expr_array(expr_numeric), unique=bool)
def argmin(array, unique: bool = False) -> Int32Expression:
"""Return the index of the minimum value in the array.
Examples
--------
>>> hl.eval(hl.argmin([0.2, 0.3, 0.6]))
0
>>> hl.eval(hl.argmin([0.4, 0.2, 0.2]))
1
>>> hl.eval(hl.argmin([0.4, 0.2, 0.2], unique=True))
None
Notes
-----
Returns the index of the minimum value in the array.
If two or more elements are tied for minimum, then the `unique` parameter
will determine the result. If `unique` is ``False``, then the first index
will be returned. If `unique` is ``True``, then the result is missing.
If the array is empty, then the result is missing.
Note
----
Missing elements are ignored.
Parameters
----------
array : :class:`.ArrayNumericExpression`
unique : bool
Returns
-------
:class:`.Expression` of type :py:data:`.tint32`
"""
if unique:
return array._method("uniqueMinIndex", tint32)
else:
return array._method("argmin", tint32)
[docs]@typecheck(array=expr_array(expr_numeric), unique=bool)
def argmax(array, unique: bool = False) -> Int32Expression:
"""Return the index of the maximum value in the array.
Examples
--------
>>> hl.eval(hl.argmax([0.2, 0.2, 0.6]))
2
>>> hl.eval(hl.argmax([0.4, 0.4, 0.2]))
0
>>> hl.eval(hl.argmax([0.4, 0.4, 0.2], unique=True))
None
Notes
-----
Returns the index of the maximum value in the array.
If two or more elements are tied for maximum, then the `unique` parameter
will determine the result. If `unique` is ``False``, then the first index
will be returned. If `unique` is ``True``, then the result is missing.
If the array is empty, then the result is missing.
Note
----
Missing elements are ignored.
Parameters
----------
array : :class:`.ArrayNumericExpression`
unique: bool
Returns
-------
:class:`.Expression` of type :py:data:`.tint32`
"""
if unique:
return array._method("uniqueMaxIndex", tint32)
else:
return array._method("argmax", tint32)
[docs]@typecheck(x=expr_oneof(expr_numeric, expr_bool, expr_str))
def float64(x) -> Float64Expression:
"""Convert to a 64-bit floating point expression.
Examples
--------
>>> hl.eval(hl.float64('1.1'))
1.1
>>> hl.eval(hl.float64(1))
1.0
>>> hl.eval(hl.float64(True))
1.0
Parameters
----------
x : :class:`.NumericExpression` or :class:`.BooleanExpression` or :class:`.StringExpression`
Returns
-------
:class:`.NumericExpression` of type :py:data:`.tfloat64`
"""
if x.dtype == tfloat64:
return x
else:
return x._method("toFloat64", tfloat64)
[docs]@typecheck(x=expr_oneof(expr_numeric, expr_bool, expr_str))
def float32(x) -> Float32Expression:
"""Convert to a 32-bit floating point expression.
Examples
--------
>>> hl.eval(hl.float32('1.1'))
1.1
>>> hl.eval(hl.float32(1))
1.0
>>> hl.eval(hl.float32(True))
1.0
Parameters
----------
x : :class:`.NumericExpression` or :class:`.BooleanExpression` or :class:`.StringExpression`
Returns
-------
:class:`.NumericExpression` of type :py:data:`.tfloat32`
"""
if x.dtype == tfloat32:
return x
else:
return x._method("toFloat32", tfloat32)
[docs]@typecheck(x=expr_oneof(expr_numeric, expr_bool, expr_str))
def int64(x) -> Int64Expression:
"""Convert to a 64-bit integer expression.
Examples
--------
>>> hl.eval(hl.int64('1'))
1
>>> hl.eval(hl.int64(1.5))
1
>>> hl.eval(hl.int64(True))
1
Parameters
----------
x : :class:`.NumericExpression` or :class:`.BooleanExpression` or :class:`.StringExpression`
Returns
-------
:class:`.NumericExpression` of type :py:data:`.tint64`
"""
if x.dtype == tint64:
return x
else:
return x._method("toInt64", tint64)
[docs]@typecheck(x=expr_oneof(expr_numeric, expr_bool, expr_str))
def int32(x) -> Int32Expression:
"""Convert to a 32-bit integer expression.
Examples
--------
>>> hl.eval(hl.int32('1'))
1
>>> hl.eval(hl.int32(1.5))
1
>>> hl.eval(hl.int32(True))
1
Parameters
----------
x : :class:`.NumericExpression` or :class:`.BooleanExpression` or :class:`.StringExpression`
Returns
-------
:class:`.NumericExpression` of type :py:data:`.tint32`
"""
if x.dtype == tint32:
return x
else:
return x._method("toInt32", tint32)
[docs]@typecheck(x=expr_oneof(expr_numeric, expr_bool, expr_str))
def int(x) -> Int32Expression:
"""Convert to a 32-bit integer expression.
Examples
--------
>>> hl.eval(hl.int('1'))
1
>>> hl.eval(hl.int(1.5))
1
>>> hl.eval(hl.int(True))
1
Note
----
Alias for :func:`.int32`.
Parameters
----------
x : :class:`.NumericExpression` or :class:`.BooleanExpression` or :class:`.StringExpression`
Returns
-------
:class:`.NumericExpression` of type :py:data:`.tint32`
"""
return int32(x)
[docs]@typecheck(x=expr_oneof(expr_numeric, expr_bool, expr_str))
def float(x) -> Float64Expression:
"""Convert to a 64-bit floating point expression.
Examples
--------
>>> hl.eval(hl.float('1.1'))
1.1
>>> hl.eval(hl.float(1))
1.0
>>> hl.eval(hl.float(True))
1.0
Note
----
Alias for :func:`.float64`.
Parameters
----------
x : :class:`.NumericExpression` or :class:`.BooleanExpression` or :class:`.StringExpression`
Returns
-------
:class:`.NumericExpression` of type :py:data:`.tfloat64`
"""
return float64(x)
[docs]@typecheck(x=expr_oneof(expr_numeric, expr_bool, expr_str))
def bool(x) -> BooleanExpression:
"""Convert to a Boolean expression.
Examples
--------
>>> hl.eval(hl.bool('TRUE'))
True
>>> hl.eval(hl.bool(1.5))
True
Notes
-----
Numeric expressions return ``True`` if they are non-zero, and ``False``
if they are zero.
Acceptable string values are: ``'True'``, ``'true'``, ``'TRUE'``,
``'False'``, ``'false'``, and ``'FALSE'``.
Parameters
----------
x : :class:`.NumericExpression` or :class:`.BooleanExpression` or :class:`.StringExpression`
Returns
-------
:class:`.BooleanExpression`
"""
if x.dtype == tbool:
return x
elif is_numeric(x.dtype):
return x != 0
else:
return x._method("toBoolean", tbool)
[docs]@typecheck(contig=expr_str,
position=expr_int32,
before=expr_int32,
after=expr_int32,
reference_genome=reference_genome_type)
def get_sequence(contig, position, before=0, after=0, reference_genome='default') -> StringExpression:
"""Return the reference sequence at a given locus.
Examples
--------
Return the reference allele for ``'GRCh37'`` at the locus ``'1:45323'``:
>>> hl.eval(hl.get_sequence('1', 45323, 'GRCh37')) # doctest: +SKIP
"T"
Notes
-----
This function requires `reference genome` has an attached
reference sequence. Use :meth:`.ReferenceGenome.add_sequence` to
load and attach a reference sequence to a reference genome.
Returns ``None`` if `contig` and `position` are not valid coordinates in
`reference_genome`.
Parameters
----------
contig : :class:`.Expression` of type :py:data:`.tstr`
Locus contig.
position : :class:`.Expression` of type :py:data:`.tint32`
Locus position.
before : :class:`.Expression` of type :py:data:`.tint32`, optional
Number of bases to include before the locus of interest. Truncates at
contig boundary.
after : :class:`.Expression` of type :py:data:`.tint32`, optional
Number of bases to include after the locus of interest. Truncates at
contig boundary.
reference_genome : :obj:`str` or :class:`.ReferenceGenome`
Reference genome to use. Must have a reference sequence available.
Returns
-------
:class:`.StringExpression`
"""
if not reference_genome.has_sequence():
raise TypeError("Reference genome '{}' does not have a sequence loaded. Use 'add_sequence' to load the sequence from a FASTA file.".format(reference_genome.name))
return _func("getReferenceSequence({})".format(reference_genome.name), tstr,
contig, position, before, after)
[docs]@typecheck(contig=expr_str,
reference_genome=reference_genome_type)
def is_valid_contig(contig, reference_genome='default') -> BooleanExpression:
"""Returns ``True`` if `contig` is a valid contig name in `reference_genome`.
Examples
--------
>>> hl.eval(hl.is_valid_contig('1', 'GRCh37'))
True
>>> hl.eval(hl.is_valid_contig('chr1', 'GRCh37'))
False
Parameters
----------
contig : :class:`.Expression` of type :py:data:`.tstr`
reference_genome : :obj:`str` or :class:`.ReferenceGenome`
Returns
-------
:class:`.BooleanExpression`
"""
return _func("isValidContig({})".format(reference_genome.name), tbool, contig)
[docs]@typecheck(contig=expr_str,
position=expr_int32,
reference_genome=reference_genome_type)
def is_valid_locus(contig, position, reference_genome='default') -> BooleanExpression:
"""Returns ``True`` if `contig` and `position` is a valid site in `reference_genome`.
Examples
--------
>>> hl.eval(hl.is_valid_locus('1', 324254, 'GRCh37'))
True
>>> hl.eval(hl.is_valid_locus('chr1', 324254, 'GRCh37'))
False
Parameters
----------
contig : :class:`.Expression` of type :py:data:`.tstr`
position : :class:`.Expression` of type :py:data:`.tint`
reference_genome : :obj:`str` or :class:`.ReferenceGenome`
Returns
-------
:class:`.BooleanExpression`
"""
return _func("isValidLocus({})".format(reference_genome.name), tbool, contig, position)
[docs]@typecheck(locus=expr_locus(), is_female=expr_bool, father=expr_call, mother=expr_call, child=expr_call)
def mendel_error_code(locus, is_female, father, mother, child):
"""Compute a Mendelian violation code for genotypes.
>>> father = hl.call(0, 0)
>>> mother = hl.call(1, 1)
>>> child1 = hl.call(0, 1) # consistent
>>> child2 = hl.call(0, 0) # Mendel error
>>> locus = hl.locus('2', 2000000)
>>> hl.eval(hl.mendel_error_code(locus, True, father, mother, child1))
None
>>> hl.eval(hl.mendel_error_code(locus, True, father, mother, child2))
7
Note
----
Ignores call phasing, and assumes diploid and biallelic. Haploid calls for
hemiploid samples on sex chromosomes also are acceptable input.
Notes
-----
In the table below, the copy state of a locus with respect to a trio is
defined as follows, where PAR is the `pseudoautosomal region
<https://en.wikipedia.org/wiki/Pseudoautosomal_region>`__ (PAR) of X and Y
defined by the reference genome and the autosome is defined by
:meth:`.LocusExpression.in_autosome`:
- Auto -- in autosome or in PAR, or in non-PAR of X and female child
- HemiX -- in non-PAR of X and male child
- HemiY -- in non-PAR of Y and male child
`Any` refers to the set \{ HomRef, Het, HomVar, NoCall \} and `~`
denotes complement in this set.
+------+---------+---------+--------+------------+---------------+
| Code | Dad | Mom | Kid | Copy State | Implicated |
+======+=========+=========+========+============+===============+
| 1 | HomVar | HomVar | Het | Auto | Dad, Mom, Kid |
+------+---------+---------+--------+------------+---------------+
| 2 | HomRef | HomRef | Het | Auto | Dad, Mom, Kid |
+------+---------+---------+--------+------------+---------------+
| 3 | HomRef | ~HomRef | HomVar | Auto | Dad, Kid |
+------+---------+---------+--------+------------+---------------+
| 4 | ~HomRef | HomRef | HomVar | Auto | Mom, Kid |
+------+---------+---------+--------+------------+---------------+
| 5 | HomRef | HomRef | HomVar | Auto | Kid |
+------+---------+---------+--------+------------+---------------+
| 6 | HomVar | ~HomVar | HomRef | Auto | Dad, Kid |
+------+---------+---------+--------+------------+---------------+
| 7 | ~HomVar | HomVar | HomRef | Auto | Mom, Kid |
+------+---------+---------+--------+------------+---------------+
| 8 | HomVar | HomVar | HomRef | Auto | Kid |
+------+---------+---------+--------+------------+---------------+
| 9 | Any | HomVar | HomRef | HemiX | Mom, Kid |
+------+---------+---------+--------+------------+---------------+
| 10 | Any | HomRef | HomVar | HemiX | Mom, Kid |
+------+---------+---------+--------+------------+---------------+
| 11 | HomVar | Any | HomRef | HemiY | Dad, Kid |
+------+---------+---------+--------+------------+---------------+
| 12 | HomRef | Any | HomVar | HemiY | Dad, Kid |
+------+---------+---------+--------+------------+---------------+
Parameters
----------
locus : :class:`.LocusExpression`
is_female : :class:`.BooleanExpression`
father : :class:`.CallExpression`
mother : :class:`.CallExpression`
child : :class:`.CallExpression`
Returns
-------
:class:`.Int32Expression`
"""
father_n = father.n_alt_alleles()
mother_n = mother.n_alt_alleles()
child_n = child.n_alt_alleles()
auto_cond = (hl.case(missing_false=True)
.when((father_n == 2) & (mother_n == 2) & (child_n == 1), 1)
.when((father_n == 0) & (mother_n == 0) & (child_n == 1), 2)
.when((father_n == 0) & (mother_n == 0) & (child_n == 2), 5)
.when((father_n == 2) & (mother_n == 2) & (child_n == 0), 8)
.when((father_n == 0) & (child_n == 2), 3)
.when((mother_n == 0) & (child_n == 2), 4)
.when((father_n == 2) & (child_n == 0), 6)
.when((mother_n == 2) & (child_n == 0), 7)
.or_missing()
)
hemi_x_cond = (hl.case(missing_false=True)
.when((mother_n == 2) & (child_n == 0), 9)
.when((mother_n == 0) & (child_n > 0), 10)
.or_missing()
)
hemi_y_cond = (hl.case(missing_false=True)
.when((father_n > 0) & (child_n == 0), 11)
.when((father_n == 0) & (child_n > 0), 12)
.or_missing()
)
return (hl.case()
.when(locus.in_autosome_or_par() | is_female, auto_cond)
.when(locus.in_x_nonpar() & (~is_female), hemi_x_cond)
.when(locus.in_y_nonpar() & (~is_female), hemi_y_cond)
.or_missing()
)
[docs]@typecheck(locus=expr_locus(), alleles=expr_array(expr_str))
def min_rep(locus, alleles):
"""Computes the minimal representation of a (locus, alleles) polymorphism.
Examples
--------
>>> hl.eval(hl.min_rep(hl.locus('1', 100000), ['TAA', 'TA']))
Struct(locus=Locus(contig=1, position=100000, reference_genome=GRCh37), alleles=['TA', 'T'])
>>> hl.eval(hl.min_rep(hl.locus('1', 100000), ['AATAA', 'AACAA']))
Struct(locus=Locus(contig=1, position=100002, reference_genome=GRCh37), alleles=['T', 'C'])
Notes
-----
Computing the minimal representation can cause the locus shift right (the
position can increase).
Parameters
----------
locus : :class:`.LocusExpression`
alleles : :class:`.ArrayExpression` of type :py:data:`.tstr`
Returns
-------
:class:`.StructExpression`
A :class:`.tstruct` expression with two fields, `locus`
(:class:`.LocusExpression`) and `alleles`
(:class:`.ArrayExpression` of type :py:data:`.tstr`).
"""
ret_type = tstruct(locus=locus.dtype, alleles=alleles.dtype)
return _func('min_rep', ret_type, locus, alleles)
[docs]@typecheck(x=oneof(expr_locus(), expr_interval(expr_locus())),
dest_reference_genome=reference_genome_type,
min_match=builtins.float)
def liftover(x, dest_reference_genome, min_match=0.95):
"""Lift over coordinates to a different reference genome.
Examples
--------
Lift over the locus coordinates from reference genome ``'GRCh37'`` to
``'GRCh38'``:
>>> hl.eval(hl.liftover(hl.locus('1', 1034245, 'GRCh37'), 'GRCh38')) # doctest: +SKIP
Locus(contig='chr1', position=1098865, reference_genome='GRCh38')
Lift over the locus interval coordinates from reference genome ``'GRCh37'``
to ``'GRCh38'``:
>>> hl.eval(hl.liftover(hl.locus_interval('20', 60001, 82456, True, True, 'GRCh37'), 'GRCh38')) # doctest: +SKIP
Interval(Locus(contig='chr20', position=79360, reference_genome='GRCh38'),
Locus(contig='chr20', position=101815, reference_genome='GRCh38'),
True,
True)
Notes
-----
This function requires the reference genome of `x` has a chain file loaded
for `dest_reference_genome`. Use :meth:`.ReferenceGenome.add_liftover` to
load and attach a chain file to a reference genome.
Returns ``None`` if `x` could not be converted.
Warning
-------
Before using the result of :func:`.liftover` as a new row key or column
key, be sure to filter out missing values.
Parameters
----------
x : :class:`.Expression` of type :py:data:`.tlocus` or :py:data:`.tinterval` of :py:data:`.tlocus`
Locus or locus interval to lift over.
dest_reference_genome : :obj:`str` or :class:`.ReferenceGenome`
Reference genome to convert to.
min_match : :class:`.Expression` of type :py:data:`.tfloat64`
Minimum ratio of bases that must remap.
Returns
-------
:class:`.Expression`
A locus or locus interval converted to `dest_reference_genome`.
"""
if not 0.0 <= min_match <= 1.0:
raise TypeError("'liftover' requires 'min_match' is in the range [0, 1]. Got {}".format(min_match))
if isinstance(x.dtype, tlocus):
rg = x.dtype.reference_genome
method_name = "liftoverLocus({})({})".format(rg.name, dest_reference_genome.name)
rtype = tlocus(dest_reference_genome)
else:
rg = x.dtype.point_type.reference_genome
method_name = "liftoverLocusInterval({})({})".format(rg.name, dest_reference_genome.name)
rtype = tinterval(tlocus(dest_reference_genome))
if not rg.has_liftover(dest_reference_genome.name):
raise TypeError("""Reference genome '{}' does not have liftover to '{}'.
Use 'add_liftover' to load a liftover chain file.""".format(rg.name, dest_reference_genome.name))
return _func(method_name, rtype, x, to_expr(min_match, tfloat))
[docs]@typecheck(f=func_spec(1, expr_float64),
min=expr_float64,
max=expr_float64)
def uniroot(f: Callable, min, max):
"""Finds a root of the function `f` within the interval `[min, max]`.
Examples
--------
>>> hl.eval(hl.uniroot(lambda x: x - 1, -5, 5))
1.0
Notes
-----
`f(min)` and `f(max)` must not have the same sign.
If no root can be found, the result of this call will be `NA` (missing).
Parameters
----------
f : function ( (arg) -> :class:`.Float64Expression`)
Must return a :class:`.Float64Expression`.
min : :class:`.Float64Expression`
max : :class:`.Float64Expression`
Returns
-------
:class:`.Float64Expression`
The root of the function `f`.
"""
new_id = Env.get_uid()
lambda_result = to_expr(f(construct_variable(new_id, hl.tfloat64)))
indices, aggregations = unify_all(lambda_result, min, max)
ir = Uniroot(new_id, lambda_result._ir, min._ir, max._ir)
return construct_expr(ir, lambda_result._type, indices, aggregations)
[docs]@typecheck(x=expr_float64, y=expr_float64, tolerance=expr_float64, absolute=expr_bool, nan_same=expr_bool)
def approx_equal(x, y, tolerance=1e-6, absolute=False, nan_same=False):
"""Tests whether two numbers are approximately equal.
Examples
--------
>>> hl.eval(hl.approx_equal(0.25, 0.2500001))
True
>>> hl.eval(hl.approx_equal(0.25, 0.251, tolerance=1e-3, absolute=True))
False
Parameters
----------
x : :class:`.NumericExpression`
y : :class:`.NumericExpression`
tolerance : :class:`.NumericExpression`
absolute : :class:`.BooleanExpression`
If True, compute ``abs(x - y) <= tolerance``. Otherwise, compute
``abs(x - y) <= max(tolerance * max(abs(x), abs(y)), 2 ** -1022)``.
nan_same : :class:`.BooleanExpression`
If True, then ``NaN == NaN`` will evaluate to True. Otherwise,
it will return False.
Returns
-------
:class:`.BooleanExpression`
"""
return _func("approxEqual", hl.tbool, x, y, tolerance, absolute, nan_same)