NumPy#
NumPy is a package for numerical computations
supports vectors, matrices and multidimensional arrays
fast numerical processing by means of vectorized functions
based on object type ndarray
numpy
vs list
#
NumPy array has fixed length, while lists can grow dynamically
All the elements of a NumPy array must have the same type
Math operations with NumPy arrays are allowed (just like with vectors)
Мотивирующий пример
import numpy as np
NumPy arrays creation#
Converting Python structures
Generation via built-in functions
Converting from Python structures#
lst = [1, 2, 3, 4, 5]
arr = np.array(lst)
print(f"list = {lst}, np.array = {arr}")
print(type(lst), type(arr))
list = [1, 2, 3, 4, 5], np.array = [1 2 3 4 5]
<class 'list'> <class 'numpy.ndarray'>
tpl = (1, 2, 3, 4, 5)
arr = np.array(tpl)
print(f"tuple = {tpl}, np.array = {arr}")
print(type(tpl), type(arr))
tuple = (1, 2, 3, 4, 5), np.array = [1 2 3 4 5]
<class 'tuple'> <class 'numpy.ndarray'>
The underlying data type can be specified by the argument dtype:
arr.dtype
dtype('int64')
np.array([1, 2, 3, 4, 5], dtype=np.float32)
array([1., 2., 3., 4., 5.], dtype=float32)
Numpy arrays generation#
arange — like
range
linspace — uniform partition of a segment
logspace — log scale partition
zeros — creates an array of zeroes
ones — creates an array of ones
full — creates an array of the same values
np.arange(0, 5, 0.5)
array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5])
np.linspace(0, 5, 11, endpoint=False)
array([0. , 0.45454545, 0.90909091, 1.36363636, 1.81818182,
2.27272727, 2.72727273, 3.18181818, 3.63636364, 4.09090909,
4.54545455])
np.logspace(0, 9, 11, base=2)
array([ 1. , 1.86606598, 3.48220225, 6.49801917,
12.12573253, 22.627417 , 42.22425314, 78.79324245,
147.03338944, 274.37400641, 512. ])
np.zeros((3, 4))
array([[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]])
np.ones((2, 2))
array([[1., 1.],
[1., 1.]])
np.full((2, 2), 42)
array([[42, 42],
[42, 42]])
# creates a diagonal matrix
np.diag([1, 2, 3])
array([[1, 0, 0],
[0, 2, 0],
[0, 0, 3]])
# creates an identity matrix
np.eye(3)
array([[1., 0., 0.],
[0., 1., 0.],
[0., 0., 1.]])
array = np.ones((2, 3))
print('Array shape = {}, number of dimensions = {}'.format(array.shape, array.ndim))
Array shape = (2, 3), number of dimensions = 2
array
array([[1., 1., 1.],
[1., 1., 1.]])
Method reshape allows to broadcast an array without changing its data.
a = np.arange(0, 6)
print(a, a.shape)
[0 1 2 3 4 5] (6,)
array = a.reshape((2, 3))
print(array, array.shape)
[[0 1 2]
[3 4 5]] (2, 3)
Use ravel to flatten a multidimensional array into a vector
# can use -1 instead of one dimension
array = np.arange(0, 6, 0.5).reshape((3, -1))
print(array, array.shape)
array = np.ravel(array)
print(array, array.shape)
[[0. 0.5 1. 1.5]
[2. 2.5 3. 3.5]
[4. 4.5 5. 5.5]] (3, 4)
[0. 0.5 1. 1.5 2. 2.5 3. 3.5 4. 4.5 5. 5.5] (12,)
Indexing#
print(array[0])
print(array[-1])
print(array[1:-1])
print(array[1:-1:2])
print(array[::-1])
0.0
5.5
[0.5 1. 1.5 2. 2.5 3. 3.5 4. 4.5 5. ]
[0.5 1.5 2.5 3.5 4.5]
[5.5 5. 4.5 4. 3.5 3. 2.5 2. 1.5 1. 0.5 0. ]
Can use arrays of ints or booleans as indices
array[[0, 2, 4, 6, 8, 10]]
array([0., 1., 2., 3., 4., 5.])
array[[True, False, True, False, True, False, True, False, True, False, True, False]]
array([0., 1., 2., 3., 4., 5.])
Boolean indexing is commonly used for filtering
x = np.arange(10)
x
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
x[(x % 2 == 0) & (x > 5)]
array([6, 8])
x
was not actually changed but altering via boolean indexing is possible
print(x)
x[x > 3] *= 2
print(x)
[0 1 2 3 4 5 6 7 8 9]
[ 0 1 2 3 8 10 12 14 16 18]
Random#
np.random.seed(101)
a = np.random.rand(5)
b = np.random.rand(5)
print(a)
print(b)
[0.51639863 0.57066759 0.02847423 0.17152166 0.68527698]
[0.83389686 0.30696622 0.89361308 0.72154386 0.18993895]
Arithmetics with arrays as vectors#
a + b
array([1.35029549, 0.87763381, 0.92208731, 0.89306552, 0.87521594])
a - b
array([-0.31749823, 0.26370137, -0.86513885, -0.55002221, 0.49533803])
a * b
array([0.4306232 , 0.17517567, 0.02544494, 0.1237604 , 0.13016079])
a / b
array([0.61925959, 1.85905663, 0.03186416, 0.2377148 , 3.60788015])
Inner product: \((a, b) = \sum\limits_{k=1}^n a_k b_k\)
a.dot(b)
0.8851650000094948
a @ b
0.8851650000094948
np.dot(a, b)
0.8851650000094948
sum
, mean
, std
#
np.sum(a), a.sum()
(1.9723390789710982, 1.9723390789710982)
np.mean(b), b.mean()
(0.5891917955929626, 0.5891917955929626)
np.std(a), np.std(b)
(0.2506550421114526, 0.2860503677763882)
lst = list(range(2*10**6))
arr = np.arange(2*10**6)
print(sum(lst), arr.sum())
1999999000000 1999999000000
%%timeit
sum(lst)
16.6 ms ± 131 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
%%timeit
np.sum(arr)
670 µs ± 10.8 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
Matrix operations#
A = np.random.normal(size=(2, 2))
A
array([[-2.01816824, 0.74012206],
[ 0.52881349, -0.58900053]])
# transpose
A.T
array([[-2.01816824, 0.52881349],
[ 0.74012206, -0.58900053]])
np.transpose(A)
array([[-2.01816824, 0.52881349],
[ 0.74012206, -0.58900053]])
# calc determinant
np.linalg.det(A)
0.7973156409556252
# inverse matrix
B = np.linalg.inv(A)
A @ B
array([[ 1.00000000e+00, -4.51542844e-17],
[-1.01303503e-16, 1.00000000e+00]])
np.sum(A)
-1.3382332261785077
# sum of elements in every column
np.sum(A, axis=0)
array([-1.48935475, 0.15112152])
# sum of elements in every row
np.sum(A, axis=1)
array([-1.27804619, -0.06018704])
Matrix indexing#
A = np.arange(15).reshape((3, 5))
A
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
B = np.random.normal(loc=5, scale=10, size=(3, 4))
B
array([[ 6.88695309, -2.58872056, -4.33237216, 14.55056509],
[ 6.90794322, 24.78757324, 31.0596728 , 11.83508886],
[ 8.02665449, 21.93722925, -12.06085931, -6.59119416]])
np.sort(B, axis=None)
array([-12.06085931, -6.59119416, -4.33237216, -2.58872056,
6.88695309, 6.90794322, 8.02665449, 11.83508886,
14.55056509, 21.93722925, 24.78757324, 31.0596728 ])
# access to element
A[1, 2]
7
# second row
A[1, :]
array([5, 6, 7, 8, 9])
# third column
A[:, 2]
array([ 2, 7, 12])
# slice
A[0, 1:4]
array([1, 2, 3])
# every second element of the last row
A[-1, ::2]
array([10, 12, 14])
# average over the whole matrix
np.mean(A)
7.0
# average over each column
np.mean(A, axis=0)
array([5., 6., 7., 8., 9.])
# average over each row
np.mean(A, axis=1)
array([ 2., 7., 12.])
B = np.arange(20, 30).reshape((5, 2))
B
array([[20, 21],
[22, 23],
[24, 25],
[26, 27],
[28, 29]])
# matrix product
A.dot(B)
array([[ 260, 270],
[ 860, 895],
[1460, 1520]])
A @ B
array([[ 260, 270],
[ 860, 895],
[1460, 1520]])
Concatenation#
np.concatenate, np.hstack, np.vstack, np.dstack
x = np.arange(6).reshape(3, 2)
y = np.arange(100, 112).reshape(3, 4)
x, y
(array([[0, 1],
[2, 3],
[4, 5]]),
array([[100, 101, 102, 103],
[104, 105, 106, 107],
[108, 109, 110, 111]]))
np.hstack((x, y))
array([[ 0, 1, 100, 101, 102, 103],
[ 2, 3, 104, 105, 106, 107],
[ 4, 5, 108, 109, 110, 111]])
np.vstack((x.T, y.T))
array([[ 0, 2, 4],
[ 1, 3, 5],
[100, 104, 108],
[101, 105, 109],
[102, 106, 110],
[103, 107, 111]])
np.concatenate((x, y), axis=1)
array([[ 0, 1, 100, 101, 102, 103],
[ 2, 3, 104, 105, 106, 107],
[ 4, 5, 108, 109, 110, 111]])
np.concatenate((x.T, y.T), axis=0)
array([[ 0, 2, 4],
[ 1, 3, 5],
[100, 104, 108],
[101, 105, 109],
[102, 106, 110],
[103, 107, 111]])