Abracadabra

Python data analysis-Learning note-Ch04

Numpy基础:数组和矢量计算

1
%matplotlib inline
1
2
3
4
from __future__ import division
from numpy.random import randn
import numpy as np
np.set_printoptions(precision=4, suppress=True)
1
2
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

NumPy ndarray: 一种多维数组对象

1
data = randn(2, 3)
1
2
3
data
data * 10
data + data
array([[ 0.1584,  0.299 , -0.2555],
       [ 0.3277, -0.6934,  1.3191]])






array([[  1.5842,   2.9896,  -2.5545],
       [  3.2767,  -6.9342,  13.1913]])






array([[ 0.3168,  0.5979, -0.5109],
       [ 0.6553, -1.3868,  2.6383]])
1
2
data.shape
data.dtype
(2, 3)






dtype('float64')

创建ndarray

1
2
3
data1 = [6, 7.5, 8, 0, 1]
arr1 = np.array(data1)
arr1
array([ 6. ,  7.5,  8. ,  0. ,  1. ])
1
2
3
4
5
data2 = [[1, 2, 3, 4], [5, 6, 7, 8]]
arr2 = np.array(data2)
arr2
arr2.ndim
arr2.shape
array([[1, 2, 3, 4],
       [5, 6, 7, 8]])






2






(2, 4)

除非显示说明,np.array会尝试为新建的数组选择一个合适的类型

1
2
arr1.dtype
arr2.dtype
dtype('float64')






dtype('int32')
1
2
3
np.zeros(10)
np.zeros((3, 6))
np.empty((2, 3, 2))
array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])






array([[ 0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.]])






array([[[ 0.,  0.],
        [ 0.,  0.],
        [ 0.,  0.]],

       [[ 0.,  0.],
        [ 0.,  0.],
        [ 0.,  0.]]])
1
np.arange(15)
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

ones_like, zeros_like, empty_like这三个方法接受一个数组为对象,创建和这个数组形状和dtype一样的全1, 全0和分配的初始空间

ndarray的数据类型

1
2
3
4
arr1 = np.array([1, 2, 3], dtype=np.float64)
arr2 = np.array([1, 2, 3], dtype=np.int32)
arr1.dtype
arr2.dtype
dtype('float64')






dtype('int32')

当需要控制数据在内存和磁盘中的存储方式时(尤其是对大数据集),那就得了解如何控制存储类型

1
2
3
4
arr = np.array([1, 2, 3, 4, 5])
arr.dtype
float_arr = arr.astype(np.float64)
float_arr.dtype
dtype('int32')






dtype('float64')
1
2
3
arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])
arr
arr.astype(np.int32)
array([  3.7,  -1.2,  -2.6,   0.5,  12.9,  10.1])






array([ 3, -1, -2,  0, 12, 10])
1
2
numeric_strings = np.array(['1.25', '-9.6', '42'], dtype=np.string_)
numeric_strings.astype(float)
array([  1.25,  -9.6 ,  42.  ])
1
2
3
int_array = np.arange(10)
calibers = np.array([.22, .270, .357, .380, .44, .50], dtype=np.float64)
int_array.astype(calibers.dtype)
array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.])
1
2
empty_uint32 = np.empty(8, dtype='u4')
empty_uint32
array([1, 2, 3, 4, 5, 6, 7, 8], dtype=uint32)

调用astype会创建原数组的一份拷贝

数组和标量之间的运算

1
2
3
4
arr = np.array([[1., 2., 3.], [4., 5., 6.]])
arr
arr * arr
arr - arr
array([[ 1.,  2.,  3.],
       [ 4.,  5.,  6.]])






array([[  1.,   4.,   9.],
       [ 16.,  25.,  36.]])






array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])
1
2
1 / arr
arr ** 0.5
array([[ 1.    ,  0.5   ,  0.3333],
       [ 0.25  ,  0.2   ,  0.1667]])






array([[ 1.    ,  1.4142,  1.7321],
       [ 2.    ,  2.2361,  2.4495]])

基本的索引和切片

1
2
3
4
5
6
arr = np.arange(10)
arr
arr[5]
arr[5:8]
arr[5:8] = 12
arr
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])






5






array([5, 6, 7])






array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

切片直接在原数组上操作
如果想要得到一个复制的版本,需要显示地调用copy()方法

1
2
3
4
5
arr_slice = arr[5:8]
arr_slice[1] = 12345
arr
arr_slice[:] = 64
arr
array([    0,     1,     2,     3,     4,    12, 12345,    12,     8,     9])






array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])
1
2
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d[2]
array([7, 8, 9])

注意下面这种索引方式

1
2
arr2d[0][2]
arr2d[0, 2]
3






3
1
2
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
arr3d
array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])
1
arr3d[0]
array([[1, 2, 3],
       [4, 5, 6]])
1
2
3
4
5
old_values = arr3d[0].copy()
arr3d[0] = 42
arr3d
arr3d[0] = old_values
arr3d
array([[[42, 42, 42],
        [42, 42, 42]],

       [[ 7,  8,  9],
        [10, 11, 12]]])






array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])
1
arr3d[1, 0]
array([7, 8, 9])

切片索引

1
arr[1:6]
array([ 1,  2,  3,  4, 64])
1
2
arr2d
arr2d[:2]
array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])






array([[1, 2, 3],
       [4, 5, 6]])
1
arr2d[:2, 1:]
array([[2, 3],
       [5, 6]])
1
2
arr2d[1, :2]
arr2d[2, :1]
array([4, 5])






array([7])
1
arr2d[:, :1]
array([[1],
       [4],
       [7]])
1
arr2d[:2, 1:] = 0

布尔型索引

1
2
3
4
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = randn(7, 4)
names
data
array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], 
      dtype='<U4')






array([[-2.9033,  1.4721,  0.9512,  1.7727],
       [ 2.2303, -1.0259,  1.0664,  0.534 ],
       [-0.9725,  0.2226, -0.1538, -0.4994],
       [-1.4289,  0.1665, -1.2874, -1.0817],
       [ 1.3581, -1.0734, -0.1387,  0.1673],
       [ 1.2816,  1.8883,  0.5699, -0.5843],
       [-0.0464, -0.9633,  0.2855, -0.6473]])
1
names == 'Bob'
array([ True, False, False,  True, False, False, False], dtype=bool)
1
data[names == 'Bob']
array([[-2.9033,  1.4721,  0.9512,  1.7727],
       [-1.4289,  0.1665, -1.2874, -1.0817]])
1
2
data[names == 'Bob', 2:]
data[names == 'Bob', 3]
array([[ 0.9512,  1.7727],
       [-1.2874, -1.0817]])






array([ 1.7727, -1.0817])
1
2
names != 'Bob'
data[~(names == 'Bob')]
array([False,  True,  True, False,  True,  True,  True], dtype=bool)






array([[ 2.2303, -1.0259,  1.0664,  0.534 ],
       [-0.9725,  0.2226, -0.1538, -0.4994],
       [ 1.3581, -1.0734, -0.1387,  0.1673],
       [ 1.2816,  1.8883,  0.5699, -0.5843],
       [-0.0464, -0.9633,  0.2855, -0.6473]])
1
2
3
mask = (names == 'Bob') | (names == 'Will')
mask
data[mask]
array([ True, False,  True,  True,  True, False, False], dtype=bool)






array([[-2.9033,  1.4721,  0.9512,  1.7727],
       [-0.9725,  0.2226, -0.1538, -0.4994],
       [-1.4289,  0.1665, -1.2874, -1.0817],
       [ 1.3581, -1.0734, -0.1387,  0.1673]])

Python关键字and和or在布尔型数组中无效

1
2
data[data < 0] = 0
data
array([[ 0.    ,  1.4721,  0.9512,  1.7727],
       [ 2.2303,  0.    ,  1.0664,  0.534 ],
       [ 0.    ,  0.2226,  0.    ,  0.    ],
       [ 0.    ,  0.1665,  0.    ,  0.    ],
       [ 1.3581,  0.    ,  0.    ,  0.1673],
       [ 1.2816,  1.8883,  0.5699,  0.    ],
       [ 0.    ,  0.    ,  0.2855,  0.    ]])
1
2
data[names != 'Joe'] = 7
data
array([[ 7.    ,  7.    ,  7.    ,  7.    ],
       [ 2.2303,  0.    ,  1.0664,  0.534 ],
       [ 7.    ,  7.    ,  7.    ,  7.    ],
       [ 7.    ,  7.    ,  7.    ,  7.    ],
       [ 7.    ,  7.    ,  7.    ,  7.    ],
       [ 1.2816,  1.8883,  0.5699,  0.    ],
       [ 0.    ,  0.    ,  0.2855,  0.    ]])

花式索引

花式索引创建新的数组

1
2
3
4
arr = np.empty((8, 4))
for i in range(8):
arr[i] = i
arr
array([[ 0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  1.],
       [ 2.,  2.,  2.,  2.],
       [ 3.,  3.,  3.,  3.],
       [ 4.,  4.,  4.,  4.],
       [ 5.,  5.,  5.,  5.],
       [ 6.,  6.,  6.,  6.],
       [ 7.,  7.,  7.,  7.]])
1
arr[[4, 3, 0, 6]]
array([[ 4.,  4.,  4.,  4.],
       [ 3.,  3.,  3.,  3.],
       [ 0.,  0.,  0.,  0.],
       [ 6.,  6.,  6.,  6.]])
1
arr[[-3, -5, -7]]
array([[ 5.,  5.,  5.,  5.],
       [ 3.,  3.,  3.,  3.],
       [ 1.,  1.,  1.,  1.]])
1
2
3
4
# more on reshape in Chapter 12
arr = np.arange(32).reshape((8, 4))
arr
arr[[1, 5, 7, 2], [0, 3, 1, 2]]
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])






array([ 4, 23, 29, 10])

根据以上可知,传入两个索引数组相当于进行了同位置组合

注意以下这种方式

1
arr[[1, 5, 7, 2]][:, [0, 3, 1, 2]]
array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

np.ix_方法将两个一维数组转换为一个矩形区域的索引选择器

1
arr[np.ix_([1, 5, 7, 2], [0, 3, 1, 2])]
array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

数组转置和轴对换

1
2
3
arr = np.arange(15).reshape((3, 5))
arr
arr.T
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])






array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])
1
2
arr = np.random.randn(6, 3)
np.dot(arr.T, arr)
array([[ 3.6804,  0.0133,  1.0388],
       [ 0.0133,  1.6074,  0.1836],
       [ 1.0388,  0.1836,  3.5281]])
1
2
3
arr = np.arange(16).reshape((2, 2, 4))
arr
arr.transpose((1, 0, 2))
array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])






array([[[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]],

       [[ 4,  5,  6,  7],
        [12, 13, 14, 15]]])

Refered from here.

In short: transposing an array means that NumPy just needs to permute the stride and shape information for each axis:

>>> arr.strides
(64, 32, 8)

>>> arr.transpose(1, 0, 2).strides
(32, 64, 8)

Notice that the strides for the first and second axes were swapped here. This means that no data needs to be copied; NumPy can simply change how it looks at the memory to construct the array.


What are strides?

The values in a 3D array arr are stored in a contiguous block of memory like this:

[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15]

In the case of arr, each integer takes up 8 bytes of memory (i.e. we’re using the int64 dtype).

A stride tells NumPy how many bytes to skip in order to move to the next value along an axis. For example, to get the next value in a row in arr (axis 2), we just need to move 8 bytes (1 number).

The strides for arr.transpose(1, 0, 2) are (32, 64, 8). To move along the first axis, instead of 64 bytes (8 numbers) NumPy will now only skip 32 bytes (4 numbers) each time:

[[[0    ...]
  [...  ...]]

 [[4    ...]
  [...  ...]]]

Similarly, NumPy will now skip 64 bytes (8 numbers) in order to move along axis 1:

[[[0    ...]
  [8    ...]]

 [[4    ...]
  [12   ...]]]

The actual code that does the transposing is written in C and can be found here.

也可以使用swapaxes方法

1
2
3
arr
arr.swapaxes(1, 2)
arr
array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])






array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])






array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

通用函数:快速的元素级数组函数

1
2
3
arr = np.arange(10)
np.sqrt(arr)
np.exp(arr)
array([ 0.    ,  1.    ,  1.4142,  1.7321,  2.    ,  2.2361,  2.4495,
        2.6458,  2.8284,  3.    ])






array([    1.    ,     2.7183,     7.3891,    20.0855,    54.5982,
         148.4132,   403.4288,  1096.6332,  2980.958 ,  8103.0839])
1
2
3
4
5
x = randn(8)
y = randn(8)
x
y
np.maximum(x, y) # 对应元素进行比较
array([ 0.811 , -0.0214, -0.3702, -0.4856,  1.1449, -0.4246,  0.9396,
        0.0382])






array([-1.223 ,  0.3271, -1.7197, -2.2636, -0.1154, -1.4122, -0.0989,
        0.4477])






array([ 0.811 ,  0.3271, -0.3702, -0.4856,  1.1449, -0.4246,  0.9396,
        0.4477])

modf函数挺有意思

1
2
3
arr = randn(7) * 5
arr
np.modf(arr)
array([ 10.3171,  -4.733 ,  -6.3358,   3.2457,  -7.3823,   2.7036,  -2.6173])






(array([ 0.3171, -0.733 , -0.3358,  0.2457, -0.3823,  0.7036, -0.6173]),
 array([ 10.,  -4.,  -6.,   3.,  -7.,   2.,  -2.]))

利用数组进行数据处理

meshgrid产生两个二维数组,对应points中所有的二元组

1
2
3
4
points = np.arange(-5, 5, 0.01) # 1000 equally spaced points
xs, ys = np.meshgrid(points, points)
xs
ys
array([[-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       ..., 
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99]])






array([[-5.  , -5.  , -5.  , ..., -5.  , -5.  , -5.  ],
       [-4.99, -4.99, -4.99, ..., -4.99, -4.99, -4.99],
       [-4.98, -4.98, -4.98, ..., -4.98, -4.98, -4.98],
       ..., 
       [ 4.97,  4.97,  4.97, ...,  4.97,  4.97,  4.97],
       [ 4.98,  4.98,  4.98, ...,  4.98,  4.98,  4.98],
       [ 4.99,  4.99,  4.99, ...,  4.99,  4.99,  4.99]])
1
from matplotlib.pyplot import imshow, title
1
2
3
4
5
import matplotlib.pyplot as plt
z = np.sqrt(xs ** 2 + ys ** 2)
z
plt.imshow(z, cmap=plt.cm.gray); plt.colorbar()
plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values")
array([[ 7.0711,  7.064 ,  7.0569, ...,  7.0499,  7.0569,  7.064 ],
       [ 7.064 ,  7.0569,  7.0499, ...,  7.0428,  7.0499,  7.0569],
       [ 7.0569,  7.0499,  7.0428, ...,  7.0357,  7.0428,  7.0499],
       ..., 
       [ 7.0499,  7.0428,  7.0357, ...,  7.0286,  7.0357,  7.0428],
       [ 7.0569,  7.0499,  7.0428, ...,  7.0357,  7.0428,  7.0499],
       [ 7.064 ,  7.0569,  7.0499, ...,  7.0428,  7.0499,  7.0569]])






<matplotlib.image.AxesImage at 0x23400a22b38>






<matplotlib.colorbar.Colorbar at 0x23400a7c7b8>






<matplotlib.text.Text at 0x23400a03da0>

meshgrid

1
plt.draw()
<matplotlib.figure.Figure at 0x23401396eb8>

将条件逻辑表述为数组运算

1
2
3
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True, False, True, True, False])

注意下面列表生成式的写法

1
2
3
result = [(x if c else y)
for x, y, c in zip(xarr, yarr, cond)]
result
[1.1000000000000001, 2.2000000000000002, 1.3, 1.3999999999999999, 2.5]

上述方法具有一些缺点:

  1. 大数组处理速度慢(纯Python实现)
  2. 无法处理多维数组

所以可以使用下面这种方法:

1
2
result = np.where(cond, xarr, yarr)
result
array([ 1.1,  2.2,  1.3,  1.4,  2.5])
1
2
3
4
arr = randn(4, 4)
arr
np.where(arr > 0, 2, -2)
np.where(arr > 0, 2, arr) # set only positive values to 2
array([[-0.7355, -0.3188, -0.2358,  0.3137],
       [-0.6196, -0.5803, -0.5504, -1.1508],
       [ 0.1719, -1.1599, -0.7115,  1.7869],
       [-0.2306,  0.2068,  1.5366,  1.6154]])






array([[-2, -2, -2,  2],
       [-2, -2, -2, -2],
       [ 2, -2, -2,  2],
       [-2,  2,  2,  2]])






array([[-0.7355, -0.3188, -0.2358,  2.    ],
       [-0.6196, -0.5803, -0.5504, -1.1508],
       [ 2.    , -1.1599, -0.7115,  2.    ],
       [-0.2306,  2.    ,  2.    ,  2.    ]])

显然where还可以应用于更复杂的操作。
考虑下面这种逻辑:

1
2
3
4
5
6
7
8
9
10
result = []
for i in range(n):
if cond1[i] and cond2[i]:
result.append(0)
elif cond1[i]:
result.append(1)
elif cond2[i]:
result.append(2)
else:
result.append(3)

where可以这样实现:

1
2
3
np.where(cond1 & cond2, 0,
np.where(cond1, 1,
np.where(cond2, 2, 3)))

更加magic一点:

1
result = 1 * cond1 + 2 * cond2 + 3 * -(cond1 | cond2)

数学和统计方法

1
2
3
4
5
6
arr = np.random.randn(5, 4) # 正态分布
arr
# 下面两种方式都可以使用
arr.mean()
np.mean(arr)
arr.sum()
array([[ 1.4513, -0.8225,  0.7011, -0.617 ],
       [ 1.5872,  1.2937,  1.0151,  0.7123],
       [-0.2012, -0.0168, -0.3847,  0.5274],
       [-0.6312, -0.2762,  0.4869,  0.0462],
       [-0.5268, -1.1071,  1.8642,  0.2282]])






0.26650934393195791






0.26650934393195791






5.3301868786391582
1
2
arr.mean(axis=1)
arr.sum(0) # axis=0
array([ 0.1782,  1.1521, -0.0188, -0.0936,  0.1146])






array([ 1.6793, -0.9289,  3.6826,  0.8971])
1
2
3
4
arr = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
arr
arr.cumsum(0) # axis=0
arr.cumprod(1) # axis=1
array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])






array([[ 0,  1,  2],
       [ 3,  5,  7],
       [ 9, 12, 15]], dtype=int32)






array([[  0,   0,   0],
       [  3,  12,  60],
       [  6,  42, 336]], dtype=int32)

用于布尔数组的方法

1
2
3
arr = randn(100)
arr
(arr > 0).sum() # Number of positive values
array([ 0.7828,  0.1372, -0.6264,  1.8927, -0.2104,  0.2822, -0.3672,
       -0.3601,  0.5918,  0.9285,  0.1808, -0.4021,  0.4086, -0.2949,
        0.5633, -0.7462, -0.1635,  0.1482, -0.3226, -1.2127, -0.9821,
        0.0536, -0.1772, -0.4714, -0.9002, -0.0037,  0.7352,  0.5675,
       -1.1612,  0.5288,  0.3319,  0.7315,  0.6841, -0.6881,  1.5654,
       -0.4605, -0.5423,  0.0184, -0.8153, -0.1313,  0.4594,  0.0228,
        0.255 , -2.2361,  0.8703, -1.5153, -0.9458,  0.2769,  0.9986,
        0.7699, -0.7948, -1.2508,  1.7059,  0.1805, -1.0265, -0.0181,
       -0.9415,  0.1265, -0.2576,  0.6791,  0.3969,  0.8027, -0.6792,
       -0.7487, -1.9949, -0.9595,  0.5706, -0.5727, -1.0204,  0.1521,
       -0.9755, -0.4094,  0.67  ,  0.212 ,  0.4081, -0.1435,  0.3964,
       -0.1865, -0.6018, -2.6185, -0.5073, -0.6328, -0.2631,  0.6637,
       -0.5586,  1.3346, -0.5317,  0.8572,  1.1159,  0.9563, -0.0434,
       -1.0534,  0.5869,  0.0502, -0.0479, -0.8673,  0.1531,  1.0646,
       -0.2624, -0.3726])






47
1
2
3
4
bools = np.array([False, False, True, False])
bools
bools.any()
bools.all()
array([False, False,  True, False], dtype=bool)






True






False

排序

1
2
3
4
arr = randn(8)
arr
arr.sort()
arr
array([ 1.0584,  1.9062, -0.2923,  0.7169,  0.5186, -0.6089, -2.0444,
       -0.5661])






array([-2.0444, -0.6089, -0.5661, -0.2923,  0.5186,  0.7169,  1.0584,
        1.9062])
1
2
3
4
arr = randn(5, 3)
arr
arr.sort(1) # axis=1
arr
array([[ 0.0118, -2.8916, -0.4477],
       [-1.9768,  1.859 , -1.128 ],
       [-2.6262,  0.5791,  0.7594],
       [-0.5254, -0.9059,  0.0203],
       [-1.4029, -1.8566,  0.1892]])






array([[-2.8916, -0.4477,  0.0118],
       [-1.9768, -1.128 ,  1.859 ],
       [-2.6262,  0.5791,  0.7594],
       [-0.9059, -0.5254,  0.0203],
       [-1.8566, -1.4029,  0.1892]])
1
2
3
4
5
large_arr = randn(1000)
large_arr
large_arr.sort()
large_arr
large_arr[int(0.05 * len(large_arr))] # 5% quantile
array([ 1.2296,  0.3794, -0.1526,  2.1223, -0.0675,  0.6867, -0.5742,
       -1.4291,  0.6856,  0.1364, -0.3966, -0.7793,  0.4965,  0.2447,
       -0.7487,  0.7695,  0.5358, -0.4813,  0.9949, -0.6489, -0.3656,
        1.9551,  0.8327,  1.497 , -0.4431, -0.8357, -0.821 , -0.7348,
        1.9294, -0.3144,  0.1396, -0.9111,  0.0943,  0.8043,  1.067 ,
        0.9362, -2.2574,  0.7475, -1.0152, -1.1234, -0.3774,  1.076 ,
        0.8743,  1.1864,  0.0801,  0.3995,  0.2536, -0.9371, -1.669 ,
       -2.2444,  1.2544,  1.0539, -0.7579,  0.2963,  0.7496, -1.3655,
        0.1552, -0.6259,  0.2621, -1.5415, -0.1036, -0.5794,  1.2098,
        1.3388,  0.3159,  1.0998,  0.5109, -0.3927,  1.4797, -1.4891,
        0.3624,  0.966 ,  0.0756, -0.4703,  0.1859,  1.6091,  0.662 ,
       -0.4808,  0.8744,  0.4738,  1.1351,  0.0251, -1.017 , -0.849 ,
       -0.1602, -1.5392,  0.0601,  1.7323,  1.1837,  0.4657,  0.8858,
       -0.211 ,  0.1865,  0.673 ,  0.3086, -1.2527, -0.7802,  0.407 ,
       -1.118 , -0.2058,  0.7921,  0.5284, -2.3038, -0.4038, -1.1087,
       -0.827 , -2.6518,  0.3711, -0.0244,  1.1103,  0.2748, -0.7962,
        1.9456,  0.5347,  0.1862, -0.3734, -0.3036,  0.6831, -0.9419,
        1.4848, -0.1247, -0.4138, -0.601 ,  0.6138,  1.1334,  0.4386,
        0.0466, -0.0588,  0.6883, -1.2912, -0.2381,  0.3934,  0.2132,
       -0.4143,  1.0844, -0.5258, -0.9944,  1.0977,  0.3528,  1.9928,
        1.421 ,  0.8634,  0.1973, -1.1799, -2.9433,  2.697 ,  0.4778,
        0.6464,  0.049 , -0.2339,  1.6945, -0.6568, -0.5972, -0.8324,
       -0.6443,  0.0882, -0.3686,  0.0419,  0.5119, -0.641 ,  1.1545,
        1.0735, -0.5329, -0.1126,  0.0375, -1.0699, -1.3153, -1.6097,
        2.5671, -0.9516, -0.388 , -0.0129, -0.0171, -1.0763, -0.7125,
        0.767 ,  0.2254, -0.7638, -0.2065,  1.2797,  0.0784, -0.7762,
        1.7106, -0.0136, -0.4435,  1.2946, -2.5489,  0.4241,  0.5675,
       -0.7596,  0.6128,  1.1161, -1.2456, -0.131 , -0.2684,  1.6461,
       -0.2497, -0.4294,  1.122 ,  0.5969,  0.3335, -0.0453,  1.1567,
        0.0216, -0.7277, -2.5465, -2.4542, -1.5895,  0.4607, -0.8303,
        0.0263,  0.0301, -1.2365, -0.146 , -0.8632,  0.6449,  0.1958,
       -0.6914, -0.3223,  0.4037,  0.9918, -0.3542,  0.8442,  0.7751,
       -1.6248,  2.6081,  0.3524,  1.5298,  0.4421,  1.5228, -1.5263,
       -1.3994,  0.0285, -0.5389,  1.4047, -2.1117, -1.0397,  0.6495,
        0.9073,  1.8738,  0.2913, -1.069 , -0.7835, -0.6437,  0.6739,
        0.3272, -0.8483, -0.2971,  0.2882,  0.1778, -0.6705, -1.4129,
       -0.1935,  0.6615, -0.4423, -1.2472, -0.9816,  0.927 , -2.2774,
        0.5736,  1.3996,  1.1653, -0.3253, -0.2074, -0.2447,  0.4925,
        1.8415, -1.1551, -0.5131, -0.6407,  0.5033, -0.817 ,  0.0479,
       -0.9106,  1.4391, -1.5824, -0.4652,  1.253 , -0.6051,  0.6699,
        0.3803,  1.0767,  1.5449,  0.106 , -0.7215, -0.354 ,  0.1016,
       -1.3191, -0.6596, -0.9632, -0.3655,  0.8411, -0.2314,  1.9493,
       -0.6966, -1.2598,  0.4023,  0.1704, -0.452 ,  1.5924,  0.381 ,
       -0.4731, -1.2467, -0.4264, -0.2298, -0.1792, -0.5009, -1.0032,
        1.0126,  0.5436,  1.1366, -1.0318,  1.3289,  0.3218, -0.2828,
        0.5597, -0.0213, -0.078 ,  0.7667, -0.3984, -1.0263, -0.5557,
       -2.0724, -0.9343, -0.6877,  1.0567, -0.605 ,  1.7923,  0.6351,
       -1.769 ,  0.4175,  0.8266,  0.3767, -0.1508, -0.4301, -0.3397,
        0.7248,  0.188 ,  1.1632, -1.0831, -0.5726, -0.475 ,  0.092 ,
       -0.1566,  1.9074, -1.4261,  1.8589, -0.7534, -1.0767, -0.2704,
        0.7567,  0.5903, -1.5612, -1.1097,  0.3504, -0.9086, -0.1691,
        0.6714, -0.6033,  1.8315, -0.8141,  0.5968, -0.408 , -1.1843,
        0.5146,  0.6201,  0.4293,  0.9797,  1.066 , -1.3325, -1.733 ,
        0.8545,  0.3993, -0.2041, -0.4624,  0.0272, -0.005 ,  0.9237,
       -0.5523,  0.9975, -0.4374,  0.1351, -0.6148,  0.3185,  0.0572,
       -0.3002,  0.0889, -0.0894, -0.5617, -2.0553, -0.2923,  0.7227,
        0.604 , -0.6623, -0.6126, -0.4991,  0.0923, -0.6982,  0.2099,
       -0.6853, -0.4752, -1.625 ,  0.0443,  2.5507, -1.1597,  0.3504,
       -0.7654, -1.4366, -1.3755,  0.3702, -1.7853, -0.7326, -1.2803,
       -0.6089, -0.4472,  0.462 ,  0.7799,  0.3141,  0.8064, -1.0487,
        0.7317, -0.2446,  0.3061,  0.1384, -0.572 , -0.0311,  0.3572,
       -0.6371, -0.2236,  0.0806,  0.6648, -0.148 , -0.2547,  1.3649,
       -0.1595,  1.3632, -0.8858,  1.1801,  0.5533,  2.3306,  0.2724,
        0.7073, -0.5605, -0.8849,  0.9533,  0.3683, -0.2901, -0.0453,
        0.1064,  1.3342, -0.7036,  0.7127,  1.2156,  0.9017,  1.2378,
       -1.1017,  1.0558,  1.4273,  0.7003,  1.1649,  0.0334,  0.3433,
       -0.3997, -0.1195,  1.3725, -0.3746,  0.8444,  0.961 , -0.2644,
        0.3245, -1.3583,  0.387 ,  1.2944,  0.0274, -0.5057,  0.15  ,
        0.6   , -0.5752,  0.3746,  1.7114, -0.0026, -0.1221, -0.8084,
       -0.9521, -0.6332,  0.7254,  1.7032, -0.0879,  0.3329, -1.9525,
       -0.7083, -0.4113,  1.163 ,  0.9018, -0.3667,  0.8419,  0.4417,
        0.2904,  0.1666,  1.3722, -0.4455, -1.4876,  0.4103,  2.3672,
        0.3569, -0.8546,  0.5152,  0.9623,  1.1777,  1.6789, -1.7793,
       -0.7797, -1.0923,  0.07  , -0.8974, -0.3151, -0.3675, -1.9851,
       -2.3352,  0.3566,  1.1929,  1.5275,  1.4349, -1.4742, -0.1913,
        1.5874, -0.7264, -0.5594,  0.3166, -0.9377, -0.6452,  0.394 ,
       -0.2238, -1.1239, -0.0324,  1.3866, -0.6174, -0.1301, -0.0328,
       -0.92  ,  1.8067,  0.2576, -0.5248,  0.4114,  0.1655, -0.1674,
        0.2743,  0.0835, -0.145 ,  1.1658,  1.2624,  0.0404,  2.0929,
        0.6047,  1.0317, -0.4956, -1.5666,  1.1729,  0.484 ,  0.955 ,
        1.0546,  0.0106,  0.5062,  0.3211,  0.8503,  0.4706,  1.9953,
       -0.9362,  0.6326, -0.3154,  1.4987, -0.1695,  1.0906, -0.686 ,
        0.2501, -0.316 ,  0.3032,  0.4873,  0.6402, -0.1209, -0.1857,
       -0.3707, -0.3082, -0.4769, -0.858 , -0.1521, -0.3403, -0.9853,
       -0.5049,  0.3338, -0.3197, -0.5789, -0.7124, -0.8867, -0.0228,
       -1.5519,  1.8517,  0.5229,  0.7613, -0.5586,  0.4827, -1.3011,
       -0.5284, -0.3806, -0.7719,  1.6304,  0.0375, -0.9122, -0.1006,
        0.382 ,  0.0969,  1.7784,  0.1831, -1.8866,  0.2996,  0.4778,
       -0.2491, -1.6537,  0.022 , -0.101 ,  0.5912, -0.2249, -1.1422,
       -0.6436, -1.4096, -0.7446,  0.8055,  1.0727,  0.2426, -0.8079,
       -1.4692,  0.062 , -0.4466,  0.3786, -2.0461,  0.7238, -1.6195,
        1.4005,  0.4881, -0.8161, -0.582 ,  0.3456,  1.2922,  0.2469,
        1.9035,  0.9072, -0.0729, -0.9424, -1.1129,  0.8922, -0.5628,
        1.6215, -0.7022,  0.8395, -0.3423,  0.6048, -0.248 ,  0.7411,
        0.3546,  0.6176, -0.8221, -0.338 , -2.1051, -1.0049, -0.0659,
        0.0917, -0.6661, -0.5234,  0.9574, -0.6316, -0.0047, -0.4773,
        0.1562, -0.116 , -1.6255, -0.9108, -1.4767, -0.7765, -1.7101,
        0.0557,  0.8112,  0.7382,  1.8806,  0.9239,  1.8638,  0.8426,
        0.0359,  0.2743,  1.9204,  1.2223,  0.4575, -0.3408,  0.3727,
        0.5036,  0.5392, -1.3331, -0.4008, -0.1341, -1.5197,  0.1923,
        0.2128,  1.1533, -1.4284, -0.7483, -0.4092,  1.2843, -0.4489,
       -0.6624,  0.9255, -0.0895,  0.3199, -0.2564, -0.1166, -1.4701,
        1.1799, -1.6238,  0.0508,  0.2312,  0.7322, -1.3623, -0.232 ,
       -0.2206,  0.566 ,  1.2411, -1.1563,  1.1777, -1.1481, -0.6716,
        0.4596, -0.2422, -0.8654,  0.4441,  0.1869,  1.4626,  0.7621,
        0.4249,  0.252 ,  0.632 ,  0.5626, -0.7925,  1.1995,  1.5665,
        0.6096,  0.4821, -0.7324, -0.7624,  1.858 , -0.8434, -0.4408,
        0.2011,  0.7552, -0.8955, -1.3255,  0.7022,  0.1507,  0.662 ,
       -1.2229,  0.5199,  0.9837, -0.3947, -0.5262, -1.0424, -1.4582,
        0.5126, -0.3606,  0.4427, -2.3922,  1.2784, -0.8382, -0.0198,
        1.2136, -0.4212, -0.7798, -1.3387, -0.7141,  0.9581, -0.8575,
       -0.2255,  0.8436, -2.2162,  0.0742,  0.9683, -0.3633, -0.0227,
       -1.2176,  1.1482, -0.6697,  0.9643, -1.2802, -0.3651, -1.29  ,
        0.851 ,  1.0167,  1.0011, -1.3014, -0.7205,  1.3621, -0.692 ,
        1.0637,  0.5637,  0.0851,  2.1514, -0.272 ,  0.3136,  0.2179,
        0.7035, -1.3028, -0.1032,  0.0611,  1.2002, -0.7346,  0.9991,
       -0.3747,  0.7908, -0.9573, -0.5114, -0.8607, -0.6711,  1.3335,
       -0.6671, -0.1687,  0.4601,  0.5747, -0.0767, -0.8428,  0.3372,
       -1.7756, -2.5264, -1.503 , -0.5669,  0.0167, -1.961 ,  0.8861,
        1.1902,  2.239 ,  0.2481,  0.7361, -1.1103,  0.8368, -1.0434,
        0.6809, -0.0839, -0.6972, -1.5492, -1.4129,  0.5889,  0.2138,
        1.7689, -0.4861, -0.1124,  0.2032,  1.0664, -0.369 ,  2.3793,
        0.4406, -1.1741,  1.0812,  1.3965, -0.149 ,  0.8793,  1.3494,
        1.2159, -0.0001,  1.1929, -0.1966, -0.1666,  1.7097, -0.4273,
        0.4831, -0.2411, -1.4517, -0.7317,  0.099 ,  1.7922,  0.2313,
       -0.5031, -0.0849,  0.7331, -0.1483, -0.8003,  1.1897,  0.031 ,
       -0.3624, -1.1133,  1.4647,  2.5653, -1.9536, -0.4528, -1.693 ,
        0.4847,  0.1368,  0.6859, -0.9872,  0.8425, -0.1492, -0.1335,
       -0.0229, -0.0903, -0.4381,  1.2552,  1.5763,  0.2375, -0.7597,
        0.0845,  0.0894, -1.6022, -0.1988,  0.3095, -1.0785, -1.6044,
       -0.4922,  0.4583,  0.3168, -2.0485, -1.2147, -0.2803, -0.2071,
        0.0767,  1.9544, -1.7648,  0.2873, -0.4029, -0.8128, -0.1081,
        0.0332,  2.5288,  0.9933,  0.4378, -0.8208, -0.2451,  0.3472,
       -0.2917,  2.0775,  1.7381, -0.467 , -0.8943, -1.4171, -0.3905,
        0.2591,  0.8118, -0.643 ,  1.0387,  0.0049,  1.7299, -0.6882,
       -1.4132, -1.0893,  0.4606, -1.546 , -2.87  ,  0.3492, -1.5968,
        0.9858,  0.1384, -0.6016, -0.9632, -0.9088,  0.3711,  1.3509,
        0.4601, -1.4963, -0.043 ,  0.5588,  0.2638, -1.1118, -0.9376,
       -0.9139,  0.6551,  0.4876, -1.7039, -0.2915,  0.3867, -0.1795,
        1.2298,  0.0893, -0.6019,  1.4109, -1.1918,  0.5009,  0.0157,
       -1.1307,  1.0407,  1.9742, -1.0377, -0.6151, -0.8398,  1.4096,
       -0.012 , -1.5323,  0.3323,  0.0539,  0.2383, -0.4059,  2.285 ,
        0.1536,  0.3838,  0.3623, -0.4326, -0.0975, -1.8119])






array([-2.9433, -2.87  , -2.6518, -2.5489, -2.5465, -2.5264, -2.4542,
       -2.3922, -2.3352, -2.3038, -2.2774, -2.2574, -2.2444, -2.2162,
       -2.1117, -2.1051, -2.0724, -2.0553, -2.0485, -2.0461, -1.9851,
       -1.961 , -1.9536, -1.9525, -1.8866, -1.8119, -1.7853, -1.7793,
       -1.7756, -1.769 , -1.7648, -1.733 , -1.7101, -1.7039, -1.693 ,
       -1.669 , -1.6537, -1.6255, -1.625 , -1.6248, -1.6238, -1.6195,
       -1.6097, -1.6044, -1.6022, -1.5968, -1.5895, -1.5824, -1.5666,
       -1.5612, -1.5519, -1.5492, -1.546 , -1.5415, -1.5392, -1.5323,
       -1.5263, -1.5197, -1.503 , -1.4963, -1.4891, -1.4876, -1.4767,
       -1.4742, -1.4701, -1.4692, -1.4582, -1.4517, -1.4366, -1.4291,
       -1.4284, -1.4261, -1.4171, -1.4132, -1.4129, -1.4129, -1.4096,
       -1.3994, -1.3755, -1.3655, -1.3623, -1.3583, -1.3387, -1.3331,
       -1.3325, -1.3255, -1.3191, -1.3153, -1.3028, -1.3014, -1.3011,
       -1.2912, -1.29  , -1.2803, -1.2802, -1.2598, -1.2527, -1.2472,
       -1.2467, -1.2456, -1.2365, -1.2229, -1.2176, -1.2147, -1.1918,
       -1.1843, -1.1799, -1.1741, -1.1597, -1.1563, -1.1551, -1.1481,
       -1.1422, -1.1307, -1.1239, -1.1234, -1.118 , -1.1133, -1.1129,
       -1.1118, -1.1103, -1.1097, -1.1087, -1.1017, -1.0923, -1.0893,
       -1.0831, -1.0785, -1.0767, -1.0763, -1.0699, -1.069 , -1.0487,
       -1.0434, -1.0424, -1.0397, -1.0377, -1.0318, -1.0263, -1.017 ,
       -1.0152, -1.0049, -1.0032, -0.9944, -0.9872, -0.9853, -0.9816,
       -0.9632, -0.9632, -0.9573, -0.9521, -0.9516, -0.9424, -0.9419,
       -0.9377, -0.9376, -0.9371, -0.9362, -0.9343, -0.92  , -0.9139,
       -0.9122, -0.9111, -0.9108, -0.9106, -0.9088, -0.9086, -0.8974,
       -0.8955, -0.8943, -0.8867, -0.8858, -0.8849, -0.8654, -0.8632,
       -0.8607, -0.858 , -0.8575, -0.8546, -0.849 , -0.8483, -0.8434,
       -0.8428, -0.8398, -0.8382, -0.8357, -0.8324, -0.8303, -0.827 ,
       -0.8221, -0.821 , -0.8208, -0.817 , -0.8161, -0.8141, -0.8128,
       -0.8084, -0.8079, -0.8003, -0.7962, -0.7925, -0.7835, -0.7802,
       -0.7798, -0.7797, -0.7793, -0.7765, -0.7762, -0.7719, -0.7654,
       -0.7638, -0.7624, -0.7597, -0.7596, -0.7579, -0.7534, -0.7487,
       -0.7483, -0.7446, -0.7348, -0.7346, -0.7326, -0.7324, -0.7317,
       -0.7277, -0.7264, -0.7215, -0.7205, -0.7141, -0.7125, -0.7124,
       -0.7083, -0.7036, -0.7022, -0.6982, -0.6972, -0.6966, -0.692 ,
       -0.6914, -0.6882, -0.6877, -0.686 , -0.6853, -0.6716, -0.6711,
       -0.6705, -0.6697, -0.6671, -0.6661, -0.6624, -0.6623, -0.6596,
       -0.6568, -0.6489, -0.6452, -0.6443, -0.6437, -0.6436, -0.643 ,
       -0.641 , -0.6407, -0.6371, -0.6332, -0.6316, -0.6259, -0.6174,
       -0.6151, -0.6148, -0.6126, -0.6089, -0.6051, -0.605 , -0.6033,
       -0.6019, -0.6016, -0.601 , -0.5972, -0.582 , -0.5794, -0.5789,
       -0.5752, -0.5742, -0.5726, -0.572 , -0.5669, -0.5628, -0.5617,
       -0.5605, -0.5594, -0.5586, -0.5557, -0.5523, -0.5389, -0.5329,
       -0.5284, -0.5262, -0.5258, -0.5248, -0.5234, -0.5131, -0.5114,
       -0.5057, -0.5049, -0.5031, -0.5009, -0.4991, -0.4956, -0.4922,
       -0.4861, -0.4813, -0.4808, -0.4773, -0.4769, -0.4752, -0.475 ,
       -0.4731, -0.4703, -0.467 , -0.4652, -0.4624, -0.4528, -0.452 ,
       -0.4489, -0.4472, -0.4466, -0.4455, -0.4435, -0.4431, -0.4423,
       -0.4408, -0.4381, -0.4374, -0.4326, -0.4301, -0.4294, -0.4273,
       -0.4264, -0.4212, -0.4143, -0.4138, -0.4113, -0.4092, -0.408 ,
       -0.4059, -0.4038, -0.4029, -0.4008, -0.3997, -0.3984, -0.3966,
       -0.3947, -0.3927, -0.3905, -0.388 , -0.3806, -0.3774, -0.3747,
       -0.3746, -0.3734, -0.3707, -0.369 , -0.3686, -0.3675, -0.3667,
       -0.3656, -0.3655, -0.3651, -0.3633, -0.3624, -0.3606, -0.3542,
       -0.354 , -0.3423, -0.3408, -0.3403, -0.3397, -0.338 , -0.3253,
       -0.3223, -0.3197, -0.316 , -0.3154, -0.3151, -0.3144, -0.3082,
       -0.3036, -0.3002, -0.2971, -0.2923, -0.2917, -0.2915, -0.2901,
       -0.2828, -0.2803, -0.272 , -0.2704, -0.2684, -0.2644, -0.2564,
       -0.2547, -0.2497, -0.2491, -0.248 , -0.2451, -0.2447, -0.2446,
       -0.2422, -0.2411, -0.2381, -0.2339, -0.232 , -0.2314, -0.2298,
       -0.2255, -0.2249, -0.2238, -0.2236, -0.2206, -0.211 , -0.2074,
       -0.2071, -0.2065, -0.2058, -0.2041, -0.1988, -0.1966, -0.1935,
       -0.1913, -0.1857, -0.1795, -0.1792, -0.1695, -0.1691, -0.1687,
       -0.1674, -0.1666, -0.1602, -0.1595, -0.1566, -0.1526, -0.1521,
       -0.1508, -0.1492, -0.149 , -0.1483, -0.148 , -0.146 , -0.145 ,
       -0.1341, -0.1335, -0.131 , -0.1301, -0.1247, -0.1221, -0.1209,
       -0.1195, -0.1166, -0.116 , -0.1126, -0.1124, -0.1081, -0.1036,
       -0.1032, -0.101 , -0.1006, -0.0975, -0.0903, -0.0895, -0.0894,
       -0.0879, -0.0849, -0.0839, -0.078 , -0.0767, -0.0729, -0.0675,
       -0.0659, -0.0588, -0.0453, -0.0453, -0.043 , -0.0328, -0.0324,
       -0.0311, -0.0244, -0.0229, -0.0228, -0.0227, -0.0213, -0.0198,
       -0.0171, -0.0136, -0.0129, -0.012 , -0.005 , -0.0047, -0.0026,
       -0.0001,  0.0049,  0.0106,  0.0157,  0.0167,  0.0216,  0.022 ,
        0.0251,  0.0263,  0.0272,  0.0274,  0.0285,  0.0301,  0.031 ,
        0.0332,  0.0334,  0.0359,  0.0375,  0.0375,  0.0404,  0.0419,
        0.0443,  0.0466,  0.0479,  0.049 ,  0.0508,  0.0539,  0.0557,
        0.0572,  0.0601,  0.0611,  0.062 ,  0.07  ,  0.0742,  0.0756,
        0.0767,  0.0784,  0.0801,  0.0806,  0.0835,  0.0845,  0.0851,
        0.0882,  0.0889,  0.0893,  0.0894,  0.0917,  0.092 ,  0.0923,
        0.0943,  0.0969,  0.099 ,  0.1016,  0.106 ,  0.1064,  0.1351,
        0.1364,  0.1368,  0.1384,  0.1384,  0.1396,  0.15  ,  0.1507,
        0.1536,  0.1552,  0.1562,  0.1655,  0.1666,  0.1704,  0.1778,
        0.1831,  0.1859,  0.1862,  0.1865,  0.1869,  0.188 ,  0.1923,
        0.1958,  0.1973,  0.2011,  0.2032,  0.2099,  0.2128,  0.2132,
        0.2138,  0.2179,  0.2254,  0.2312,  0.2313,  0.2375,  0.2383,
        0.2426,  0.2447,  0.2469,  0.2481,  0.2501,  0.252 ,  0.2536,
        0.2576,  0.2591,  0.2621,  0.2638,  0.2724,  0.2743,  0.2743,
        0.2748,  0.2873,  0.2882,  0.2904,  0.2913,  0.2963,  0.2996,
        0.3032,  0.3061,  0.3086,  0.3095,  0.3136,  0.3141,  0.3159,
        0.3166,  0.3168,  0.3185,  0.3199,  0.3211,  0.3218,  0.3245,
        0.3272,  0.3323,  0.3329,  0.3335,  0.3338,  0.3372,  0.3433,
        0.3456,  0.3472,  0.3492,  0.3504,  0.3504,  0.3524,  0.3528,
        0.3546,  0.3566,  0.3569,  0.3572,  0.3623,  0.3624,  0.3683,
        0.3702,  0.3711,  0.3711,  0.3727,  0.3746,  0.3767,  0.3786,
        0.3794,  0.3803,  0.381 ,  0.382 ,  0.3838,  0.3867,  0.387 ,
        0.3934,  0.394 ,  0.3993,  0.3995,  0.4023,  0.4037,  0.407 ,
        0.4103,  0.4114,  0.4175,  0.4241,  0.4249,  0.4293,  0.4378,
        0.4386,  0.4406,  0.4417,  0.4421,  0.4427,  0.4441,  0.4575,
        0.4583,  0.4596,  0.4601,  0.4601,  0.4606,  0.4607,  0.462 ,
        0.4657,  0.4706,  0.4738,  0.4778,  0.4778,  0.4821,  0.4827,
        0.4831,  0.484 ,  0.4847,  0.4873,  0.4876,  0.4881,  0.4925,
        0.4965,  0.5009,  0.5033,  0.5036,  0.5062,  0.5109,  0.5119,
        0.5126,  0.5146,  0.5152,  0.5199,  0.5229,  0.5284,  0.5347,
        0.5358,  0.5392,  0.5436,  0.5533,  0.5588,  0.5597,  0.5626,
        0.5637,  0.566 ,  0.5675,  0.5736,  0.5747,  0.5889,  0.5903,
        0.5912,  0.5968,  0.5969,  0.6   ,  0.604 ,  0.6047,  0.6048,
        0.6096,  0.6128,  0.6138,  0.6176,  0.6201,  0.632 ,  0.6326,
        0.6351,  0.6402,  0.6449,  0.6464,  0.6495,  0.6551,  0.6615,
        0.662 ,  0.662 ,  0.6648,  0.6699,  0.6714,  0.673 ,  0.6739,
        0.6809,  0.6831,  0.6856,  0.6859,  0.6867,  0.6883,  0.7003,
        0.7022,  0.7035,  0.7073,  0.7127,  0.7227,  0.7238,  0.7248,
        0.7254,  0.7317,  0.7322,  0.7331,  0.7361,  0.7382,  0.7411,
        0.7475,  0.7496,  0.7552,  0.7567,  0.7613,  0.7621,  0.7667,
        0.767 ,  0.7695,  0.7751,  0.7799,  0.7908,  0.7921,  0.8043,
        0.8055,  0.8064,  0.8112,  0.8118,  0.8266,  0.8327,  0.8368,
        0.8395,  0.8411,  0.8419,  0.8425,  0.8426,  0.8436,  0.8442,
        0.8444,  0.8503,  0.851 ,  0.8545,  0.8634,  0.8743,  0.8744,
        0.8793,  0.8858,  0.8861,  0.8922,  0.9017,  0.9018,  0.9072,
        0.9073,  0.9237,  0.9239,  0.9255,  0.927 ,  0.9362,  0.9533,
        0.955 ,  0.9574,  0.9581,  0.961 ,  0.9623,  0.9643,  0.966 ,
        0.9683,  0.9797,  0.9837,  0.9858,  0.9918,  0.9933,  0.9949,
        0.9975,  0.9991,  1.0011,  1.0126,  1.0167,  1.0317,  1.0387,
        1.0407,  1.0539,  1.0546,  1.0558,  1.0567,  1.0637,  1.066 ,
        1.0664,  1.067 ,  1.0727,  1.0735,  1.076 ,  1.0767,  1.0812,
        1.0844,  1.0906,  1.0977,  1.0998,  1.1103,  1.1161,  1.122 ,
        1.1334,  1.1351,  1.1366,  1.1482,  1.1533,  1.1545,  1.1567,
        1.163 ,  1.1632,  1.1649,  1.1653,  1.1658,  1.1729,  1.1777,
        1.1777,  1.1799,  1.1801,  1.1837,  1.1864,  1.1897,  1.1902,
        1.1929,  1.1929,  1.1995,  1.2002,  1.2098,  1.2136,  1.2156,
        1.2159,  1.2223,  1.2296,  1.2298,  1.2378,  1.2411,  1.253 ,
        1.2544,  1.2552,  1.2624,  1.2784,  1.2797,  1.2843,  1.2922,
        1.2944,  1.2946,  1.3289,  1.3335,  1.3342,  1.3388,  1.3494,
        1.3509,  1.3621,  1.3632,  1.3649,  1.3722,  1.3725,  1.3866,
        1.3965,  1.3996,  1.4005,  1.4047,  1.4096,  1.4109,  1.421 ,
        1.4273,  1.4349,  1.4391,  1.4626,  1.4647,  1.4797,  1.4848,
        1.497 ,  1.4987,  1.5228,  1.5275,  1.5298,  1.5449,  1.5665,
        1.5763,  1.5874,  1.5924,  1.6091,  1.6215,  1.6304,  1.6461,
        1.6789,  1.6945,  1.7032,  1.7097,  1.7106,  1.7114,  1.7299,
        1.7323,  1.7381,  1.7689,  1.7784,  1.7922,  1.7923,  1.8067,
        1.8315,  1.8415,  1.8517,  1.858 ,  1.8589,  1.8638,  1.8738,
        1.8806,  1.9035,  1.9074,  1.9204,  1.9294,  1.9456,  1.9493,
        1.9544,  1.9551,  1.9742,  1.9928,  1.9953,  2.0775,  2.0929,
        2.1223,  2.1514,  2.239 ,  2.285 ,  2.3306,  2.3672,  2.3793,
        2.5288,  2.5507,  2.5653,  2.5671,  2.6081,  2.697 ])






-1.5519406239259821

唯一化以及其他的集合逻辑

1
2
3
4
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
np.unique(names)
ints = np.array([3, 3, 3, 2, 2, 1, 1, 4, 4])
np.unique(ints)
array(['Bob', 'Joe', 'Will'], 
      dtype='<U4')






array([1, 2, 3, 4])
1
sorted(set(names))
['Bob', 'Joe', 'Will']

in1d感觉挺有用

1
2
values = np.array([6, 0, 0, 3, 2, 5, 6])
np.in1d(values, [2, 3, 6])
array([ True, False, False,  True,  True, False,  True], dtype=bool)

用于数组的文件输入输出

将数组以二进制的形式保存到磁盘

1
2
arr = np.arange(10)
np.save('some_array', arr)
1
np.load('some_array.npy')
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

压缩存储,并且可以存储多个

1
np.savez('array_archive.npz', a=arr[:4], b=arr)
1
2
3
arch = np.load('array_archive.npz')
arch['a']
arch['b']
array([0, 1, 2, 3])






array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

存取文本文件

1
!more ch04\array_ex.txt
0.580052,0.186730,1.040717,1.134411
0.194163,-0.636917,-0.938659,0.124094
-0.126410,0.268607,-0.695724,0.047428
-1.484413,0.004176,-0.744203,0.005487
2.302869,0.200131,1.670238,-1.881090
-0.193230,1.047233,0.482803,0.960334
1
2
arr = np.loadtxt('.\\ch04\\array_ex.txt', delimiter=',')
arr
array([[ 0.5801,  0.1867,  1.0407,  1.1344],
       [ 0.1942, -0.6369, -0.9387,  0.1241],
       [-0.1264,  0.2686, -0.6957,  0.0474],
       [-1.4844,  0.0042, -0.7442,  0.0055],
       [ 2.3029,  0.2001,  1.6702, -1.8811],
       [-0.1932,  1.0472,  0.4828,  0.9603]])

线性代数

1
2
3
4
5
x = np.array([[1., 2., 3.], [4., 5., 6.]])
y = np.array([[6., 23.], [-1, 7], [8, 9]])
x
y
x.dot(y) # equivalently np.dot(x, y)
array([[ 1.,  2.,  3.],
       [ 4.,  5.,  6.]])






array([[  6.,  23.],
       [ -1.,   7.],
       [  8.,   9.]])






array([[  28.,   64.],
       [  67.,  181.]])
1
np.dot(x, np.ones(3))
array([  6.,  15.])
1
np.random.seed(12345)
1
2
3
4
5
6
7
8
9
from numpy.linalg import inv, qr
X = randn(5, 5)
X
mat = X.T.dot(X)
mat
inv(mat)
mat.dot(inv(mat))
q, r = qr(mat) # QR分解
r
array([[-0.5031, -0.6223, -0.9212, -0.7262,  0.2229],
       [ 0.0513, -1.1577,  0.8167,  0.4336,  1.0107],
       [ 1.8249, -0.9975,  0.8506, -0.1316,  0.9124],
       [ 0.1882,  2.1695, -0.1149,  2.0037,  0.0296],
       [ 0.7953,  0.1181, -0.7485,  0.585 ,  0.1527]])






array([[ 4.2538, -1.0645,  1.4407,  0.9898,  1.7318],
       [-1.0645,  7.4431, -1.5585,  4.4972, -2.1367],
       [ 1.4407, -1.5585,  2.8126,  0.243 ,  1.2786],
       [ 0.9898,  4.4972,  0.243 ,  5.0897,  0.305 ],
       [ 1.7318, -2.1367,  1.2786,  0.305 ,  1.928 ]])






array([[ 0.4057, -0.1875, -0.0764,  0.1229, -0.541 ],
       [-0.1875,  2.462 ,  0.2537, -2.3367,  3.0984],
       [-0.0764,  0.2537,  0.5435, -0.2369,  0.0268],
       [ 0.1229, -2.3367, -0.2369,  2.4239, -2.9264],
       [-0.541 ,  3.0984,  0.0268, -2.9264,  4.8837]])






array([[ 1.,  0., -0., -0., -0.],
       [ 0.,  1., -0., -0., -0.],
       [ 0.,  0.,  1.,  0., -0.],
       [ 0., -0.,  0.,  1.,  0.],
       [ 0.,  0., -0.,  0.,  1.]])






array([[-5.0281,  2.7734, -2.8428, -1.0619, -3.0078],
       [ 0.    , -8.7212,  1.2925, -6.5614,  1.622 ],
       [ 0.    ,  0.    , -2.0873, -1.0487, -0.6291],
       [ 0.    ,  0.    ,  0.    , -1.408 , -0.955 ],
       [ 0.    ,  0.    ,  0.    ,  0.    ,  0.1537]])

随机数生成

1
2
samples = np.random.normal(size=(4, 4))
samples
array([[-0.5196,  1.297 ,  0.9062,  0.5809],
       [ 1.2233, -1.3301,  1.0483,  0.357 ],
       [-0.7935, -0.406 , -0.0096, -0.596 ],
       [ 1.3833, -0.2029, -1.0547, -0.9795]])
1
2
3
4
from random import normalvariate
N = 1000000
%timeit samples = [normalvariate(0, 1) for _ in range(N)]
%timeit np.random.normal(size=N)
1 loop, best of 3: 814 ms per loop
10 loops, best of 3: 28.4 ms per loop

可以看出numpy确实要快很多

Example: 随机游走

1
2
3
4
5
6
7
8
9
import random
position = 0
walk = [position]
steps = 1000
for i in range(steps):
step = 1 if random.randint(0, 1) else -1
position += step
walk.append(position)

通过numpy来实现上述过程

1
np.random.seed(12345)
1
2
3
4
nsteps = 1000
draws = np.random.randint(0, 2, size=nsteps)
steps = np.where(draws > 0, 1, -1)
walk = steps.cumsum()
1
2
3
4
import matplotlib.pyplot as plt
index = [x + 1 for x in range(len(walk))]
plt.plot(index, walk)
[<matplotlib.lines.Line2D at 0x234014e9f98>]

random_walk

1
2
walk.min()
walk.max()
-3






31
1
(np.abs(walk) >= 10).argmax() # the first index
37

一次模拟多次随机漫步

1
2
3
4
5
6
nwalks = 5000 # 5000 random walk
nsteps = 1000
draws = np.random.randint(0, 2, size=(nwalks, nsteps)) # 0 or 1
steps = np.where(draws > 0, 1, -1)
walks = steps.cumsum(1)
walks
array([[ -1,   0,  -1, ...,  24,  23,  22],
       [ -1,   0,  -1, ..., -36, -37, -36],
       [  1,   2,   3, ..., -42, -41, -40],
       ..., 
       [  1,   0,  -1, ...,  48,  49,  50],
       [ -1,  -2,  -3, ..., -38, -39, -40],
       [ -1,   0,   1, ..., -48, -47, -48]], dtype=int32)
1
2
walks.max()
walks.min()
130






-117
1
2
3
hits30 = (np.abs(walks) >= 30).any(1)
hits30
hits30.sum() # Number that hit 30 or -30
array([ True,  True,  True, ...,  True,  True,  True], dtype=bool)






3412
1
2
crossing_times = (np.abs(walks[hits30]) >= 30).argmax(1)
crossing_times.mean()
497.04103165298943