NumPy#

NumPy is a package for numerical computations

  • supports vectors, matrices and multidimensional arrays

  • fast numerical processing by means of vectorized functions

  • based on object type ndarray

numpy vs list#

  • NumPy array has fixed length, while lists can grow dynamically

  • All the elements of a NumPy array must have the same type

  • Math operations with NumPy arrays are allowed (just like with vectors)

Мотивирующий пример Imgur

import numpy as np

NumPy arrays creation#

  • Converting Python structures

  • Generation via built-in functions

Converting from Python structures#

lst = [1, 2, 3, 4, 5]
arr = np.array(lst)
print(f"list = {lst}, np.array = {arr}")
print(type(lst), type(arr))
list = [1, 2, 3, 4, 5], np.array = [1 2 3 4 5]
<class 'list'> <class 'numpy.ndarray'>
tpl = (1, 2, 3, 4, 5)
arr = np.array(tpl)
print(f"tuple = {tpl}, np.array = {arr}")
print(type(tpl), type(arr))
tuple = (1, 2, 3, 4, 5), np.array = [1 2 3 4 5]
<class 'tuple'> <class 'numpy.ndarray'>

The underlying data type can be specified by the argument dtype:

arr.dtype
dtype('int64')
np.array([1, 2, 3, 4, 5], dtype=np.float32)
array([1., 2., 3., 4., 5.], dtype=float32)

Numpy arrays generation#

  • arange — like range

  • linspace — uniform partition of a segment

  • logspace — log scale partition

  • zeros — creates an array of zeroes

  • ones — creates an array of ones

  • full — creates an array of the same values

np.arange(0, 5, 0.5)
array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5])
np.linspace(0, 5, 11, endpoint=False)
array([0.        , 0.45454545, 0.90909091, 1.36363636, 1.81818182,
       2.27272727, 2.72727273, 3.18181818, 3.63636364, 4.09090909,
       4.54545455])
np.logspace(0, 9, 11, base=2)
array([  1.        ,   1.86606598,   3.48220225,   6.49801917,
        12.12573253,  22.627417  ,  42.22425314,  78.79324245,
       147.03338944, 274.37400641, 512.        ])
np.zeros((3, 4))
array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])
np.ones((2, 2))
array([[1., 1.],
       [1., 1.]])
np.full((2, 2), 42)
array([[42, 42],
       [42, 42]])
# creates a diagonal matrix
np.diag([1, 2, 3])
array([[1, 0, 0],
       [0, 2, 0],
       [0, 0, 3]])
# creates an identity matrix
np.eye(3)
array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])
array = np.ones((2, 3))
print('Array shape = {}, number of dimensions = {}'.format(array.shape, array.ndim))
Array shape = (2, 3), number of dimensions = 2
array
array([[1., 1., 1.],
       [1., 1., 1.]])

Method reshape allows to broadcast an array without changing its data.

a = np.arange(0, 6)
print(a, a.shape)
[0 1 2 3 4 5] (6,)
array = a.reshape((2, 3))
print(array, array.shape)
[[0 1 2]
 [3 4 5]] (2, 3)

Use ravel to flatten a multidimensional array into a vector

# can use -1 instead of one dimension
array = np.arange(0, 6, 0.5).reshape((3, -1))
print(array, array.shape)
array = np.ravel(array)
print(array, array.shape)
[[0.  0.5 1.  1.5]
 [2.  2.5 3.  3.5]
 [4.  4.5 5.  5.5]] (3, 4)
[0.  0.5 1.  1.5 2.  2.5 3.  3.5 4.  4.5 5.  5.5] (12,)

Indexing#

print(array[0])
print(array[-1])  
print(array[1:-1])
print(array[1:-1:2])
print(array[::-1])
0.0
5.5
[0.5 1.  1.5 2.  2.5 3.  3.5 4.  4.5 5. ]
[0.5 1.5 2.5 3.5 4.5]
[5.5 5.  4.5 4.  3.5 3.  2.5 2.  1.5 1.  0.5 0. ]

Can use arrays of ints or booleans as indices

array[[0, 2, 4, 6, 8, 10]]
array([0., 1., 2., 3., 4., 5.])
array[[True, False, True, False, True, False, True, False, True, False, True, False]]
array([0., 1., 2., 3., 4., 5.])

Boolean indexing is commonly used for filtering

x = np.arange(10)
x
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
x[(x % 2 == 0) & (x > 5)]
array([6, 8])

x was not actually changed but altering via boolean indexing is possible

print(x)
x[x > 3] *= 2
print(x)
[0 1 2 3 4 5 6 7 8 9]
[ 0  1  2  3  8 10 12 14 16 18]

Random#

np.random.seed(101)
a = np.random.rand(5)
b = np.random.rand(5)
print(a)
print(b)
[0.51639863 0.57066759 0.02847423 0.17152166 0.68527698]
[0.83389686 0.30696622 0.89361308 0.72154386 0.18993895]

Arithmetics with arrays as vectors#

a + b 
array([1.35029549, 0.87763381, 0.92208731, 0.89306552, 0.87521594])
a - b
array([-0.31749823,  0.26370137, -0.86513885, -0.55002221,  0.49533803])
a * b
array([0.4306232 , 0.17517567, 0.02544494, 0.1237604 , 0.13016079])
a / b
array([0.61925959, 1.85905663, 0.03186416, 0.2377148 , 3.60788015])

Inner product: \((a, b) = \sum\limits_{k=1}^n a_k b_k\)

a.dot(b)
0.8851650000094948
a @ b
0.8851650000094948
np.dot(a, b)
0.8851650000094948

sum, mean, std#

np.sum(a), a.sum()
(1.9723390789710982, 1.9723390789710982)
np.mean(b), b.mean()
(0.5891917955929626, 0.5891917955929626)
np.std(a), np.std(b)
(0.2506550421114526, 0.2860503677763882)
lst = list(range(2*10**6))
arr = np.arange(2*10**6)
print(sum(lst), arr.sum())
1999999000000 1999999000000
%%timeit
sum(lst)
16.6 ms ± 131 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
%%timeit
np.sum(arr)
670 µs ± 10.8 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)

Matrix operations#

A = np.random.normal(size=(2, 2))
A
array([[-2.01816824,  0.74012206],
       [ 0.52881349, -0.58900053]])
# transpose
A.T
array([[-2.01816824,  0.52881349],
       [ 0.74012206, -0.58900053]])
np.transpose(A)
array([[-2.01816824,  0.52881349],
       [ 0.74012206, -0.58900053]])
# calc determinant
np.linalg.det(A)
0.7973156409556252
# inverse matrix
B = np.linalg.inv(A)
A @ B
array([[ 1.00000000e+00, -4.51542844e-17],
       [-1.01303503e-16,  1.00000000e+00]])
np.sum(A)
-1.3382332261785077
# sum of elements in every column
np.sum(A, axis=0)
array([-1.48935475,  0.15112152])
# sum of elements in every row
np.sum(A, axis=1)
array([-1.27804619, -0.06018704])

Matrix indexing#

A = np.arange(15).reshape((3, 5))
A
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])
B = np.random.normal(loc=5, scale=10, size=(3, 4))
B
array([[  6.88695309,  -2.58872056,  -4.33237216,  14.55056509],
       [  6.90794322,  24.78757324,  31.0596728 ,  11.83508886],
       [  8.02665449,  21.93722925, -12.06085931,  -6.59119416]])
np.sort(B, axis=None)
array([-12.06085931,  -6.59119416,  -4.33237216,  -2.58872056,
         6.88695309,   6.90794322,   8.02665449,  11.83508886,
        14.55056509,  21.93722925,  24.78757324,  31.0596728 ])
# access to element
A[1, 2]
7
# second row
A[1, :]
array([5, 6, 7, 8, 9])
# third column
A[:, 2]
array([ 2,  7, 12])
# slice
A[0, 1:4]
array([1, 2, 3])
# every second element of the last row
A[-1, ::2]
array([10, 12, 14])
# average over the whole matrix
np.mean(A)
7.0
# average over each column
np.mean(A, axis=0)
array([5., 6., 7., 8., 9.])
# average over each row
np.mean(A, axis=1)
array([ 2.,  7., 12.])
B = np.arange(20, 30).reshape((5, 2))
B
array([[20, 21],
       [22, 23],
       [24, 25],
       [26, 27],
       [28, 29]])
# matrix product
A.dot(B)
array([[ 260,  270],
       [ 860,  895],
       [1460, 1520]])
A @ B
array([[ 260,  270],
       [ 860,  895],
       [1460, 1520]])

Concatenation#

np.concatenate, np.hstack, np.vstack, np.dstack

x = np.arange(6).reshape(3, 2)
y = np.arange(100, 112).reshape(3, 4)
x, y
(array([[0, 1],
        [2, 3],
        [4, 5]]),
 array([[100, 101, 102, 103],
        [104, 105, 106, 107],
        [108, 109, 110, 111]]))
np.hstack((x, y))
array([[  0,   1, 100, 101, 102, 103],
       [  2,   3, 104, 105, 106, 107],
       [  4,   5, 108, 109, 110, 111]])
np.vstack((x.T, y.T))
array([[  0,   2,   4],
       [  1,   3,   5],
       [100, 104, 108],
       [101, 105, 109],
       [102, 106, 110],
       [103, 107, 111]])
np.concatenate((x, y), axis=1)
array([[  0,   1, 100, 101, 102, 103],
       [  2,   3, 104, 105, 106, 107],
       [  4,   5, 108, 109, 110, 111]])
np.concatenate((x.T, y.T), axis=0)
array([[  0,   2,   4],
       [  1,   3,   5],
       [100, 104, 108],
       [101, 105, 109],
       [102, 106, 110],
       [103, 107, 111]])