import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas import DataFrame, Series
import seaborn as sns


ts = Series(np.random.randn(1000),
           index = pd.date_range('1/1/2000', periods=1000)
           )
ts

2000-01-01    1.690774
2000-01-02    1.144960
2000-01-03    0.392472
2000-01-04   -0.262784
2000-01-05    0.218545
                ...   
2002-09-22   -1.490359
2002-09-23   -1.961708
2002-09-24    0.113210
2002-09-25    0.792010
2002-09-26    0.258762
Freq: D, Length: 1000, dtype: float64


ts = ts.cumsum()
ts.plot()

<AxesSubplot:>


ts.plot(kind='bar') # 시간이 한참 걸린다.

<AxesSubplot:>


x = np.linspace(0, 14, 100) # 0부터 14까지의 값을 100간견으로 쪼갠다
x

array([ 0.        ,  0.14141414,  0.28282828,  0.42424242,  0.56565657,
        0.70707071,  0.84848485,  0.98989899,  1.13131313,  1.27272727,
        1.41414141,  1.55555556,  1.6969697 ,  1.83838384,  1.97979798,
        2.12121212,  2.26262626,  2.4040404 ,  2.54545455,  2.68686869,
        2.82828283,  2.96969697,  3.11111111,  3.25252525,  3.39393939,
        3.53535354,  3.67676768,  3.81818182,  3.95959596,  4.1010101 ,
        4.24242424,  4.38383838,  4.52525253,  4.66666667,  4.80808081,
        4.94949495,  5.09090909,  5.23232323,  5.37373737,  5.51515152,
        5.65656566,  5.7979798 ,  5.93939394,  6.08080808,  6.22222222,
        6.36363636,  6.50505051,  6.64646465,  6.78787879,  6.92929293,
        7.07070707,  7.21212121,  7.35353535,  7.49494949,  7.63636364,
        7.77777778,  7.91919192,  8.06060606,  8.2020202 ,  8.34343434,
        8.48484848,  8.62626263,  8.76767677,  8.90909091,  9.05050505,
        9.19191919,  9.33333333,  9.47474747,  9.61616162,  9.75757576,
        9.8989899 , 10.04040404, 10.18181818, 10.32323232, 10.46464646,
       10.60606061, 10.74747475, 10.88888889, 11.03030303, 11.17171717,
       11.31313131, 11.45454545, 11.5959596 , 11.73737374, 11.87878788,
       12.02020202, 12.16161616, 12.3030303 , 12.44444444, 12.58585859,
       12.72727273, 12.86868687, 13.01010101, 13.15151515, 13.29292929,
       13.43434343, 13.57575758, 13.71717172, 13.85858586, 14.        ])


y = np.sin(x)
y

array([ 0.        ,  0.14094328,  0.27907268,  0.41163048,  0.53597023,
        0.64960951,  0.75027957,  0.83597055,  0.90497167,  0.95590534,
        0.98775469,  0.99988386,  0.99205069,  0.96441158,  0.91751832,
        0.85230712,  0.7700799 ,  0.67247831,  0.56145091,  0.43921435,
        0.30820902,  0.1710504 ,  0.03047682, -0.11070522, -0.24967706,
       -0.38366419, -0.5099916 , -0.6261372 , -0.72978219, -0.81885732,
       -0.89158426, -0.94651102, -0.98254101, -0.99895492, -0.99542503,
       -0.97202182, -0.92921254, -0.86785185, -0.78916479, -0.69472233,
       -0.58640998, -0.46639016, -0.33705902, -0.20099861, -0.06092533,
        0.0803643 ,  0.22004948,  0.35534146,  0.48353916,  0.60208317,
        0.7086068 ,  0.80098333,  0.87736852,  0.93623734,  0.9764145 ,
        0.99709789,  0.99787456,  0.97872901,  0.94004348,  0.8825903 ,
        0.8075165 ,  0.71632092,  0.61082424,  0.49313267,  0.36559587,
        0.23076008,  0.09131724, -0.04994872, -0.19021747, -0.32668859,
       -0.45663749, -0.57746977, -0.68677308, -0.78236519, -0.86233765,
       -0.92509384, -0.96938085, -0.99431451, -0.99939702, -0.98452691,
       -0.95000106, -0.89650877, -0.82511799, -0.73725402, -0.63467102,
       -0.51941703, -0.39379307, -0.26030716, -0.1216243 ,  0.01948674,
        0.16020873,  0.29773221,  0.42931158,  0.55231988,  0.66430131,
        0.76302019,  0.84650563,  0.91309089,  0.9614466 ,  0.99060736])


sns.set_style('white') # 스타일로 다양하게 구현 할 수 있다.
plt.plot(x,y)
plt.show()


sns.set_style('dark') # 스타일로 다양하게 구현 할 수 있다.
plt.plot(x,y)
plt.show()


sns.set_style('whitegrid') # 스타일로 다양하게 구현 할 수 있다.
plt.plot(x,y)
plt.show()


# 0부터 100까지의 숫자 총 101개를 만든다.
xs = np.array(np.linspace(start=0, stop=100, num=101))
xs

array([  0.,   1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.,
        11.,  12.,  13.,  14.,  15.,  16.,  17.,  18.,  19.,  20.,  21.,
        22.,  23.,  24.,  25.,  26.,  27.,  28.,  29.,  30.,  31.,  32.,
        33.,  34.,  35.,  36.,  37.,  38.,  39.,  40.,  41.,  42.,  43.,
        44.,  45.,  46.,  47.,  48.,  49.,  50.,  51.,  52.,  53.,  54.,
        55.,  56.,  57.,  58.,  59.,  60.,  61.,  62.,  63.,  64.,  65.,
        66.,  67.,  68.,  69.,  70.,  71.,  72.,  73.,  74.,  75.,  76.,
        77.,  78.,  79.,  80.,  81.,  82.,  83.,  84.,  85.,  86.,  87.,
        88.,  89.,  90.,  91.,  92.,  93.,  94.,  95.,  96.,  97.,  98.,
        99., 100.])


df = DataFrame(xs)
df


df = DataFrame(xs, columns=['Features'])
df


# 위의 데이터프레임을 Boxplot으로 나타냄
plt.figure(figsize=(7,6)) # 그래프 사이즈를 지정한다.
df.boxplot(column=['Features'])
plt.yticks(np.arange(start=0, stop=101,step=5))# y축의 간격을 조정하는 함수이다.
plt.show()


tips = sns.load_dataset('tips') # 데이터를 셋 할때 사용한다.
tips.head(1)


# boxplot()을 사용해서 음식 지출액의 중간값과 이상치값을 구해보자
plt.figure(figsize=(10,4))
sns.boxplot(x=tips['total_bill'])
plt.show()


tips.sort_values(by = ['total_bill'], ascending=False)


# 이번에는 날짜별로 음식지출액을 살펴보겠다.

sns.boxplot(x = 'day', y='total_bill' , data=tips, )
#plt.yticks(arange(start=0,stop=50,step=1))
plt.show() # 결과를 보면


# hue=서로 데이터를 비교분석할때 hue를 사용한다.
sns.boxplot(x='day', y='total_bill', hue='smoker', data=tips)
plt.show()


sns.boxplot(x='day', y='tip', hue='smoker', data=tips)
plt.show()


# 다차원적인 해석이 가능하다.
sns.lmplot(x = 'total_bill' , y = 'tip', data=tips) 
plt.show()


# 비교하는 부분에 용이함.
# 팁을 더 많이 주는 걸 알수있다.
sns.lmplot(x = 'total_bill' , y = 'tip', data=tips, hue='smoker') 
plt.show()


# 년도
flights = sns.load_dataset('flights')
flights.head()


# groupby랑 pivot_table의 차이점을 알아보기 > 숙제
flights = flights.pivot_table(index=['month'], values='passengers',columns=['year'])
flights.head()


plt.figure(figsize=(20,16))
sns.heatmap(flights)
plt.show()


# annot 은 값을 적겟다는뜻. 
# fmt은 포멧팅이고 데시벌은 정수값이 출력되게 한다. 
plt.figure(figsize=(20,16))
sns.heatmap(flights,annot=True,fmt='d') 
plt.show()

	total_bill	tip	sex	smoker	day	time	size
170	50.81	10.00	Male	Yes	Sat	Dinner	3
212	48.33	9.00	Male	No	Sat	Dinner	4
59	48.27	6.73	Male	No	Sat	Dinner	4
156	48.17	5.00	Male	No	Sun	Dinner	6
182	45.35	3.50	Male	Yes	Sun	Dinner	3
...	...	...	...	...	...	...	...
149	7.51	2.00	Male	No	Thur	Lunch	2
111	7.25	1.00	Female	No	Sat	Dinner	1
172	7.25	5.15	Male	Yes	Sun	Dinner	2
92	5.75	1.00	Female	Yes	Fri	Dinner	2
67	3.07	1.00	Female	Yes	Sat	Dinner	1

	year	month	passengers
0	1949	Jan	112
1	1949	Feb	118
2	1949	Mar	132
3	1949	Apr	129
4	1949	May	121

year	1949	1950	1951	1952	1953	1954	1955	1956	1957	1958	1959	1960
month
Jan	112	115	145	171	196	204	242	284	315	340	360	417
Feb	118	126	150	180	196	188	233	277	301	318	342	391
Mar	132	141	178	193	236	235	267	317	356	362	406	419
Apr	129	135	163	181	235	227	269	313	348	348	396	461
May	121	125	172	183	229	234	270	318	355	363	420	472

[DL] Pytorch 연구 모델 (0)	2021.01.18
[ML] SVM을 활용한 데이터 분석 (0)	2021.01.18
[Matplot] Matplot Lib를 이용한 시각화 (0)	2021.01.18
[Pandas][Numpy][seaborn] 2019년 서울 특별시 아파트 실거래 분석 및 표 만들기 (0)	2021.01.18
[Pandas][Numpy][seaborn] 2019년 서울 특별시 주유소 판매가격 분석 및 표 만들기 (0)	2021.01.18

[seaborn] seaborn을 이용한 시각화

1. sns의 기본¶

2. Boxplot¶

실전데이터 응용¶

3. lmplot¶

4. Heatamp¶

'workSpace > PYTHON' 카테고리의 다른 글

티스토리툴바

	0
0	0.0
1	1.0
2	2.0
3	3.0
4	4.0
...	...
96	96.0
97	97.0
98	98.0
99	99.0
100	100.0

	Features
0	0.0
1	1.0
2	2.0
3	3.0
4	4.0
...	...
96	96.0
97	97.0
98	98.0
99	99.0
100	100.0