Python numpy 라이브러리

넘파이 연습사이트

: https://www.machinelearningplus.com/python/101-numpy-exercises-python/

모듈 및 배열생성

import numpy as np

np.array( [ ] )

a = np.array([1, 2, 3, 4, 5, 6])

a = np.array([ [1, 2, 3], [4, 5, 6] ])

#리스트형으로 요소를 넣는다.

data = [2, 4, 6, 8, 10.5]

b = np.array(data)

# 2.0, 4.0, 6.0, 8,0, 10.5

b.dtype #float

# 하나의 자료형으로 요소 통일됨.

np.arange( )

c = np.arange(4)

# 0, 1, 2, 3

d = np.arrange(2, 10, 2)

# 2, 4, 6, 8

np.linspace( )

np.linspace( 시작점, 끝점, 갯수)

e = np.linspace(2, 8, 4)

# 2, 4, 6, 8

np.repeat( ) - 요소 반복

e = np.repeat(1,5)

# 1, 1, 1, 1, 1

np.tile( ) - 묶음 반복

np.tile([1,2,3],3)

# 1, 2, 3, 1, 2, 3, 1, 2, 3

np.repeat([1,2,3],3)

# 1, 1, 1, 2, 2, 2, 3, 3, 3

np.full( )

np.full( 모양, 값, dtype=)

e = np.full( (2,2), 5 )

# [ [5, 5], [5, 5] ]

.reshape( , ) / .shape

f= d.reshape(2,2)

# [2, 4], [6, 8]

# 나머지 값을 '-1'로 입력하면, 자동으로 계산되어 입력된다.

f.shpae

# (2, 2)

특별한 형태의 배열

np.zeros(10)
: 값이 0이고 요소가 10개인 1차원 배열
np.zeros((3, 4))
: 값이 0이인 3*4 배열
np.ones( )
: 값이 1인 ~ 배열
np.eye(5)
: 단위행렬 5*5
: 곱했을 때, 자기 자신이 나오는 행렬

배열의 타입 변환

배열.astype( type명 )

난수 배열 생성, 무작위 요소 섞기

np.random.rand()

# 0~1 사이 난수 생성

np.random.rand(3, 3)

# 0~1 사이 3*3 난수 생성

np.random.randint(10, size=(2,2))

# 0~10 사이 2*2 난수 생성

np.random.shuffle()

# 인덱스 랜덤 배치

배열 붙이기

> a = np.arange(10).reshape(2,-1)

> b = np.repeat(1, 10).reshape(2,-1)

# Method 1:

np.concatenate([a, b], axis=0/1)

# Method 2:

np.vstack([a, b]) / np.hstack([a, b])

# Method 3:

np.r_[a, b] / np.c_[a, b]

> array([[0, 1, 2, 3, 4],

> [5, 6, 7, 8, 9],

> [1, 1, 1, 1, 1],

> [1, 1, 1, 1, 1]])

배열의 연산

arr1 = np.array([10, 20, 30, 40])

arr2 = np.array([1, 2, 3, 4])

사칙연산

arr1 + arr2

arr1 - arr2

arr1 * arr2

arr1 / arr2

arr2 * 2

arr2 ** 2

논리연산

arr1 > 1

# 각 요소에 대해서, 논리 결과값 True / False 도출

arr1[ arr1 > 20 ]

# True인 요소만 남김

arr1[ (arr1 > 20) & (arr1 < 30 ) ]

# 두 가지 이상의 수식을 포함할 경우, & 사용

배열의 통계 함수

arr3 = np.array([0, 1, 2, 3, 4])

합, 평균, 표준편차, 변수, 최대/최소

arr3.sum()

arr3.mean()

arr3.std()

arr3.var()

arr3.min()

arr3.max()

누적 합/곱

arr3.cumsum()

# 1, 3, 6, 10

arr3.cumprod()

# 1, 2, 6, 24

백분위 수 가져오기

np.percentile(arr3, q=[25, 90])

행렬곱 / 전치행렬

행렬곱

np.dot(A, B)

A.dot(B)

A@B

전치행렬

np.transpose()

배열의 인덱싱과 슬라이싱

a1 = np.array([10, 20, 30, 40, 50])

a1[0]

# 10

a1[[0, 2, 4]]

# 10, 30, 50

a2 = np.array([10, 20, 30], [40, 50, 60], [70, 80, 90])

a2[1, 1] / a2[1][1]

# 50

a2[1]

# 40, 50, 60

a2[1] = [44, 55, 66]

a2[1]

# 44, 55, 66

a3 = np.array([10, 20, 30, 40, 50])

a3[:3]

# 10, 20, 30

a4 = np.array([10, 20, 30], [40, 50, 60], [70, 80, 90])

a4[0:2][1:3] / a4[0:2, 1:3]

# [20, 30], [50, 60]

조건에 맞는 index 찾기

np.where(arr의 조건)

index 출력

np.where( 조건식, 참일때값, 거짓일때값)

# arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

# out = np.where(arr%2==1, -1, arr)

# array([ 0, -1, 2, -1, 4, -1, 6, -1, 8, -1])

np.argmax(arr)

최대값에 해당하는 index 찾기

np.argmin(arr)

최소값에 해당하는 index 찾기

집합함수

np.unique(x) : 배열 내 중복된 원소 제거 후 유일한 원소를 정렬하여 반환

(1) np.intersect1d(x, y) : 두 개의 배열 x, y 의 교집합을 정렬하여 반환

(2) np.union1d(x, y) : 두 개의 배열 x, y의 합집합을 정렬하여 반환

(3) np.setdiff1d(x, y) : 첫번째 배열 x로 부터 두번째 배열 y를 뺀 차집합을 반환

(4) np.setxor1d(x, y) : 두 배열 x, y의 합집합에서 교집합을 뺀 대칭차집합을 반환

데이터셋 가져오기

url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'

iris = np.genfromtxt(url, delimiter=',', dtype='object')