NumPy

NumPy#

NumPy is a package for numerical computations

supports vectors, matrices and multidimensional arrays
fast numerical processing by means of vectorized functions
based on object type ndarray

`numpy` vs `list`#

NumPy array has fixed length, while lists can grow dynamically
All the elements of a NumPy array must have the same type
Math operations with NumPy arrays are allowed (just like with vectors)

Мотивирующий пример Imgur

import numpy as np

NumPy arrays creation#

Converting Python structures
Generation via built-in functions

Converting from Python structures#

lst = [1, 2, 3, 4, 5]
arr = np.array(lst)
print(f"list = {lst}, np.array = {arr}")
print(type(lst), type(arr))

list = [1, 2, 3, 4, 5], np.array = [1 2 3 4 5]
<class 'list'> <class 'numpy.ndarray'>

tpl = (1, 2, 3, 4, 5)
arr = np.array(tpl)
print(f"tuple = {tpl}, np.array = {arr}")
print(type(tpl), type(arr))

tuple = (1, 2, 3, 4, 5), np.array = [1 2 3 4 5]
<class 'tuple'> <class 'numpy.ndarray'>

The underlying data type can be specified by the argument dtype:

arr.dtype

dtype('int64')

np.array([1, 2, 3, 4, 5], dtype=np.float32)

array([1., 2., 3., 4., 5.], dtype=float32)

Numpy arrays generation#

arange — like range
linspace — uniform partition of a segment
logspace — log scale partition
zeros — creates an array of zeroes
ones — creates an array of ones
full — creates an array of the same values

np.arange(0, 5, 0.5)

array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5])

np.linspace(0, 5, 11, endpoint=False)

array([0.        , 0.45454545, 0.90909091, 1.36363636, 1.81818182,
       2.27272727, 2.72727273, 3.18181818, 3.63636364, 4.09090909,
       4.54545455])

np.logspace(0, 9, 11, base=2)

array([  1.        ,   1.86606598,   3.48220225,   6.49801917,
        12.12573253,  22.627417  ,  42.22425314,  78.79324245,
       147.03338944, 274.37400641, 512.        ])

np.zeros((3, 4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

np.ones((2, 2))

array([[1., 1.],
       [1., 1.]])

np.full((2, 2), 42)

array([[42, 42],
       [42, 42]])

# creates a diagonal matrix
np.diag([1, 2, 3])

array([[1, 0, 0],
       [0, 2, 0],
       [0, 0, 3]])

# creates an identity matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

array = np.ones((2, 3))
print('Array shape = {}, number of dimensions = {}'.format(array.shape, array.ndim))

Array shape = (2, 3), number of dimensions = 2

array

array([[1., 1., 1.],
       [1., 1., 1.]])

Method reshape allows to broadcast an array without changing its data.

a = np.arange(0, 6)
print(a, a.shape)

[0 1 2 3 4 5] (6,)

array = a.reshape((2, 3))
print(array, array.shape)

[[0 1 2]
 [3 4 5]] (2, 3)

Use ravel to flatten a multidimensional array into a vector

# can use -1 instead of one dimension
array = np.arange(0, 6, 0.5).reshape((3, -1))
print(array, array.shape)
array = np.ravel(array)
print(array, array.shape)

[[0.  0.5 1.  1.5]
 [2.  2.5 3.  3.5]
 [4.  4.5 5.  5.5]] (3, 4)
[0.  0.5 1.  1.5 2.  2.5 3.  3.5 4.  4.5 5.  5.5] (12,)

Indexing#

print(array[0])
print(array[-1])  
print(array[1:-1])
print(array[1:-1:2])
print(array[::-1])

0.0
5.5
[0.5 1.  1.5 2.  2.5 3.  3.5 4.  4.5 5. ]
[0.5 1.5 2.5 3.5 4.5]
[5.5 5.  4.5 4.  3.5 3.  2.5 2.  1.5 1.  0.5 0. ]

Can use arrays of ints or booleans as indices

array[[0, 2, 4, 6, 8, 10]]

array([0., 1., 2., 3., 4., 5.])

array[[True, False, True, False, True, False, True, False, True, False, True, False]]

array([0., 1., 2., 3., 4., 5.])

Boolean indexing is commonly used for filtering

x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

x[(x % 2 == 0) & (x > 5)]

array([6, 8])

x was not actually changed but altering via boolean indexing is possible

print(x)
x[x > 3] *= 2
print(x)

[0 1 2 3 4 5 6 7 8 9]
[ 0  1  2  3  8 10 12 14 16 18]

Random#

np.random.seed(101)
a = np.random.rand(5)
b = np.random.rand(5)
print(a)
print(b)

[0.51639863 0.57066759 0.02847423 0.17152166 0.68527698]
[0.83389686 0.30696622 0.89361308 0.72154386 0.18993895]

Arithmetics with arrays as vectors#

a + b 

array([1.35029549, 0.87763381, 0.92208731, 0.89306552, 0.87521594])

a - b

array([-0.31749823,  0.26370137, -0.86513885, -0.55002221,  0.49533803])

a * b

array([0.4306232 , 0.17517567, 0.02544494, 0.1237604 , 0.13016079])

a / b

array([0.61925959, 1.85905663, 0.03186416, 0.2377148 , 3.60788015])

Inner product: \((a, b) = \sum\limits_{k=1}^n a_k b_k\)

a.dot(b)

0.8851650000094948

a @ b

0.8851650000094948

np.dot(a, b)

0.8851650000094948

`sum`, `mean`, `std`#

np.sum(a), a.sum()

(1.9723390789710982, 1.9723390789710982)

np.mean(b), b.mean()

(0.5891917955929626, 0.5891917955929626)

np.std(a), np.std(b)

(0.2506550421114526, 0.2860503677763882)

lst = list(range(2*10**6))
arr = np.arange(2*10**6)
print(sum(lst), arr.sum())

1999999000000 1999999000000

%%timeit
sum(lst)

16.6 ms ± 131 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

%%timeit
np.sum(arr)

670 µs ± 10.8 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)

Matrix operations#

A = np.random.normal(size=(2, 2))
A

array([[-2.01816824,  0.74012206],
       [ 0.52881349, -0.58900053]])

# transpose
A.T

array([[-2.01816824,  0.52881349],
       [ 0.74012206, -0.58900053]])

np.transpose(A)

array([[-2.01816824,  0.52881349],
       [ 0.74012206, -0.58900053]])

# calc determinant
np.linalg.det(A)

0.7973156409556252

# inverse matrix
B = np.linalg.inv(A)
A @ B

array([[ 1.00000000e+00, -4.51542844e-17],
       [-1.01303503e-16,  1.00000000e+00]])

np.sum(A)

-1.3382332261785077

# sum of elements in every column
np.sum(A, axis=0)

array([-1.48935475,  0.15112152])

# sum of elements in every row
np.sum(A, axis=1)

array([-1.27804619, -0.06018704])

Matrix indexing#

A = np.arange(15).reshape((3, 5))
A

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

B = np.random.normal(loc=5, scale=10, size=(3, 4))
B

array([[  6.88695309,  -2.58872056,  -4.33237216,  14.55056509],
       [  6.90794322,  24.78757324,  31.0596728 ,  11.83508886],
       [  8.02665449,  21.93722925, -12.06085931,  -6.59119416]])

np.sort(B, axis=None)

array([-12.06085931,  -6.59119416,  -4.33237216,  -2.58872056,
         6.88695309,   6.90794322,   8.02665449,  11.83508886,
        14.55056509,  21.93722925,  24.78757324,  31.0596728 ])

# access to element
A[1, 2]

# second row
A[1, :]

array([5, 6, 7, 8, 9])

# third column
A[:, 2]

array([ 2,  7, 12])

# slice
A[0, 1:4]

array([1, 2, 3])

# every second element of the last row
A[-1, ::2]

array([10, 12, 14])

# average over the whole matrix
np.mean(A)

7.0

# average over each column
np.mean(A, axis=0)

array([5., 6., 7., 8., 9.])

# average over each row
np.mean(A, axis=1)

array([ 2.,  7., 12.])

B = np.arange(20, 30).reshape((5, 2))
B

array([[20, 21],
       [22, 23],
       [24, 25],
       [26, 27],
       [28, 29]])

# matrix product
A.dot(B)

array([[ 260,  270],
       [ 860,  895],
       [1460, 1520]])

A @ B

array([[ 260,  270],
       [ 860,  895],
       [1460, 1520]])

Concatenation#

np.concatenate, np.hstack, np.vstack, np.dstack

x = np.arange(6).reshape(3, 2)
y = np.arange(100, 112).reshape(3, 4)

x, y

(array([[0, 1],
        [2, 3],
        [4, 5]]),
 array([[100, 101, 102, 103],
        [104, 105, 106, 107],
        [108, 109, 110, 111]]))

np.hstack((x, y))

array([[  0,   1, 100, 101, 102, 103],
       [  2,   3, 104, 105, 106, 107],
       [  4,   5, 108, 109, 110, 111]])

np.vstack((x.T, y.T))

array([[  0,   2,   4],
       [  1,   3,   5],
       [100, 104, 108],
       [101, 105, 109],
       [102, 106, 110],
       [103, 107, 111]])

np.concatenate((x, y), axis=1)

array([[  0,   1, 100, 101, 102, 103],
       [  2,   3, 104, 105, 106, 107],
       [  4,   5, 108, 109, 110, 111]])

np.concatenate((x.T, y.T), axis=0)

array([[  0,   2,   4],
       [  1,   3,   5],
       [100, 104, 108],
       [101, 105, 109],
       [102, 106, 110],
       [103, 107, 111]])