ํ”„๋กœ๊ทธ๋ž˜๋ฐ ์–ธ์–ด/Python

[Python] ์ฃผํ”ผํ„ฐ ๋…ธํŠธ๋ถ์„ ์ด์šฉํ•œ ๋ฐ์ดํ„ฐ ์‹œ๊ฐํ™” ๐Ÿ“ˆ (2)

ใ‚ใกใ‚ƒใใกใ‚ƒ ้–‹็™บ่€…๐Ÿฆพ 2022. 5. 14. 03:36

ใ€Š ์ฃผํ”ผํ„ฐ ๋…ธํŠธ๋ถ์„ ํ™œ์šฉํ•œ ๋ฐ์ดํ„ฐ ์‹œ๊ฐํ™” ใ€‹

 

 

[ ํŒŒ์ด์ฌ ๋‚ด์žฅ ํ•จ์ˆ˜ split()๋ฅผ ์ด์šฉํ•œ ๋ฌธ์ž์—ด ๋ถ„๋ฆฌ ํ™œ์šฉ ์˜ˆ์ œ ]

 

โ‘  split() ํ•จ์ˆ˜๋ฅผ ํ†ตํ•ด ๋ฐ์ดํ„ฐ์˜ ๋…„, ์›”, ์ผ ๋ถ„๋ฆฌํ•˜๊ธฐ

# Jupyter Notebook

# str.split(sep=None, maxsplit=- 1)
# sep์„ ๊ตฌ๋ถ„์ž ๋ฌธ์ž์—ด๋กœ ์‚ฌ์šฉํ•˜์—ฌ ๋ฌธ์ž์—ด ๋‚ด ๋‹จ์–ด๋“ค์˜ ๋ฆฌ์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜
# maxsplit : ์ตœ๋Œ€ ๋ถ„ํ•  ํšŸ์ˆ˜ ์ง€์ •, [๊ธฐ๋ณธ๊ฐ’ : -1 (์ œํ•œx)]
date = '1907-10-01'
print(date.split('-'))
print(date.split('-')[0])
print(date.split('-')[1])
print(date.split('-')[2])

############ [์‹คํ–‰ ๊ฒฐ๊ณผ] ############
['1907', '10', '01']
1907
10
01

 

โ‘ก split() ํ•จ์ˆ˜๋ฅผ ํ†ตํ•ด ํŠน์ • ์›”์˜ ์ตœ๊ณ  ๊ธฐ์˜จ๋งŒ ์‹œ๊ฐํ™”

# Jupyter Notebook

import csv

f = open('seoul.csv')
data = csv.reader(f)
next(data)  # csv ํŒŒ์ผ์˜ ํ—ค๋” ์ถ”์ถœ
result = []

for row in data :
    if row[-1] != '' :  # ์ตœ๊ณ  ๊ธฐ์˜จ์ด ์ •์ƒ์ ์ธ ๊ฐ’์ผ ๊ฒฝ์šฐ ์ˆ˜ํ–‰(''์ด ์•„๋‹ ๊ฒฝ์šฐ)
        if row[0].split('-')[1] == '08' :  # ์—ฐ๋„ ๊ตฌ๋ถ„์—†์ด 8์›” ์ตœ๊ณ  ๊ธฐ์˜จ๋งŒ ์ €์žฅ
            result.append(float(row[-1]))

import matplotlib.pyplot as plt
plt.plot(result, 'hotpink')
plt.show()   # ๋ฐ์ดํ„ฐ์˜ 8์›” ์ตœ๊ณ  ๊ธฐ์˜จ ๋ถ„ํฌ๋„ ์‹œ๊ฐํ™”

plt.show() ์‹คํ–‰ ๊ฒฐ๊ณผ

 

 

โ‘ข split() ํ•จ์ˆ˜๋ฅผ ํ†ตํ•ด ํŠน์ • ์›”, ์ผ์˜ ์ตœ๊ณ  ๊ธฐ์˜จ ์‹œ๊ฐํ™”

# Jupyter Notebook

import csv

f = open('seoul.csv')
data = csv.reader(f)
next(data)
result = []

for row in data :
    if row[-1] != '' :
        # 2์›” 14์ผ์˜ ์ตœ๊ณ  ๊ธฐ์˜จ ๋ฐ์ดํ„ฐ๋งŒ result ๋ฆฌ์ŠคํŠธ์— ์ €์žฅ 
        if row[0].split('-')[1] == '02' and row[0].split('-')[2] == '14' :
            result.append(float(row[-1]))

import matplotlib.pyplot as plt
#plt.figure(dpi = 300)
plt.plot(result, 'hotpink')
plt.show()  # 1907~2018๋…„ ์‚ฌ์ด 2์›” 14์ผ ์ตœ๊ณ  ๊ธฐ์˜จ ๋ถ„ํฌ๋„ ์‹œ๊ฐํ™”

plt.show() ์‹คํ–‰ ๊ฒฐ๊ณผ

 

 

โ‘ฃ split() ํ•จ์ˆ˜๋ฅผ ํ†ตํ•ด ํŠน์ • ๊ธฐ๊ฐ„์˜ ์ตœ๊ณ /์ € ๊ธฐ์˜จ ์‹œ๊ฐ ๋ฐ์ดํ„ฐ ๋น„๊ต

# Jypyter Notebook

import csv

f = open('seoul.csv')
data = csv.reader(f)
next(data)
high = []
low = []

for row in data :
    if row[-1] != '' and row[-2] != '' :
        # 1983~2018๋…„ ์‚ฌ์ด 2์›” 14์ผ์˜ ์ตœ๊ณ /์ตœ์ € ๊ธฐ์˜จ ๋ฐ์ดํ„ฐ ์ถ”์ถœ
        if 1983 <= int(row[0].split('-')[0]) :
            if row[0].split('-')[1] == '02' and row[0].split('-')[2] == '14' :
                high.append(float(row[-1]))
                low.append(float(row[-2]))

import matplotlib.pyplot as plt
#plt.figure(dpi = 300)
plt.plot(high, 'hotpink') # ์ตœ๊ณ  ๊ธฐ์˜จ ๋ฐ์ดํ„ฐ ์„  ์ƒ‰์ƒ ํ•ซํ•‘ํฌ ์ง€์ •
plt.plot(low, 'skyblue')  # ์ตœ์ € ๊ธฐ์˜จ ๋ฐ์ดํ„ฐ ์„  ์ƒ‰์ƒ ํ•˜๋Š˜์ƒ‰ ์ง€์ •
plt.show()  # 1983~2018๋…„ ์‚ฌ์ด 2์›” 14์ผ์˜ ์ตœ๊ณ /์ตœ์ € ๊ธฐ์˜จ ๋ฐ์ดํ„ฐ ์‹œ๊ฐํ™”

plt.show() ์‹คํ–‰ ๊ฒฐ๊ณผ

 

 

โ‘ค plt.rc() ํ•จ์ˆ˜ ์‚ฌ์šฉ ์˜ˆ์ œ

# Jupyter Notebook

import csv
import matplotlib.pyplot as plt

f = open('seoul.csv')
data = csv.reader(f)
next(data)
high = [] # ์ตœ๊ณ  ๊ธฐ์˜จ ๊ฐ’์„ ์ €์žฅํ•  ๋ฆฌ์ŠคํŠธ high ์ƒ์„ฑ
low = []  # ์ตœ์ € ๊ธฐ์˜จ ๊ฐ’์„ ์ €์žฅํ•  ๋ฆฌ์ŠคํŠธ low ์ƒ์„ฑ

for row in data :
    if row[-1] != '' and row[-2] != '' :
        date = row[0].split('-')  # date ๋ณ€์ˆ˜์— ๋…„๋„๊ฐ’ ์ €์žฅ
        # 1990 ~ 2010๋…„ ์‚ฌ์ด 1์›” 1์ผ์˜ ์ตœ๊ณ /์ตœ์ € ๊ธฐ์˜จ ๋ฐ์ดํ„ฐ ์ถ”์ถœ
        if 1990 <= int(date[0]) <= 2010 :
            if date[1] == '01' and date[2] == '01' :
                high.append(float(row[-1])) # ์ตœ๊ณ  ๊ธฐ์˜จ ๊ฐ’์„ high ๋ฆฌ์ŠคํŠธ์— ์ €์žฅ
                low.append(float(row[-2]))  # ์ตœ์ € ๊ธฐ์˜จ ๊ฐ’์„ low ๋ฆฌ์ŠคํŠธ์— ์ €์žฅ

# plt.rcParams(group, **kwargs) : ํ˜„์žฌ์˜ rcParams๋ฅผ ์„ค์ •ํ•˜๋Š” ํ•จ์ˆ˜
plt.rcParams['axes.unicode_minus'] = False # ๋งˆ์ด๋„ˆ์Šค ๊ธฐํ˜ธ ๊นจ์ง ๋ฐฉ์ง€

# plt.rc('lines', **kwargs) : ๊ทธ๋ž˜ํ”„ ์„  ์ •๋ณด๋ฅผ ์„ค์ • ๊ฐ€๋Šฅ
# plt.rc('font', **font) : ๊ธ€์”จ์ฒด, ๊ธ€์”จ ํฌ๊ธฐ, ๊ตต๊ธฐ ๋“ฑ์„ ์„ค์ • ๊ฐ€๋Šฅ
font = {'family' : 'Malgun Gothic',
        'weight' : 'bold',
        'size' : 13}

plt.rc('lines', lw=2, ls='dotted') # ์„  ๊ตต๊ธฐ, ์„  ์Šคํƒ€์ผ ์ง€์ •
plt.rc('font', **font) # ๋ง‘์€ ๊ณ ๋”•์„ ๊ธฐ๋ณธ ๊ธ€๊ผด๋กœ ์„ค์ •
plt.title('1990~2010๋…„ ์‚ฌ์ด 1์›”1์ผ์˜ ๊ธฐ์˜จ ๋ณ€ํ™”') # ์ œ๋ชฉ ์„ค์ •
plt.plot(high, 'hotpink', label = 'high') # high ๋ฆฌ์ŠคํŠธ์— ์ €์žฅ๋œ ๊ฐ’์„ hotpink ์ƒ‰์œผ๋กœ ๊ทธ๋ฆฌ๊ณ  ๋ ˆ์ด๋ธ”์„ ํ‘œ์‹œ
plt.plot(low, 'skyblue', label = 'low') # low ๋ฆฌ์ŠคํŠธ์— ์ €์žฅ๋œ ๊ฐ’์„ skyblue ์ƒ‰์œผ๋กœ ๊ทธ๋ฆฌ๊ณ  ๋ ˆ์ด๋ธ”์„ ํ‘œ์‹œ
plt.legend() # ๋ฒ”๋ก€ ํ‘œ์‹œ
plt.show()   # ๊ทธ๋ž˜ํ”„ ์‹œ๊ฐํ™”

plt.show() ์‹คํ–‰ ๊ฒฐ๊ณผ

 

 

[ matplotlib์˜ ๊ตฌ์กฐ ] 

 - plt.plot() ํ•จ์ˆ˜๋ฅผ ์‹คํ–‰ ์‹œํ‚ค๋ฉด ์•„๋ž˜ 3๋‹จ๊ณ„๋ฅผ ๊ฑฐ์ณ ๊ทธ๋ž˜ํ”„๋ฅผ ๊ทธ๋ฆฌ๊ฒŒ ๋จ

  1. pyplot์„ ์ด์šฉํ•œ ๋ช…๋ น (์ฝ”๋“œ)
  2. ๊ฐ์ฒด๋“ค์˜ ์ƒ์„ฑ (Figure, Line, Axes)
  3. ๊ทธ๋ž˜ํ”„ ์‹œ๊ฐํ™”

 - 3๋ฒˆ์งธ ๋‹จ๊ณ„์ธ ๊ทธ๋ž˜ํ”„ ์‹œ๊ฐํ™”๋ฅผ ์œ„ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ matplotlib ์ด๋‹ค.

 - matplotlib.pyplot์€ ํ˜ธ์ถœ์„ ์œ„ํ•œ API๋ฅผ ์ œ๊ณตํ•จ

 - matplotlib์€ ๊ทธ๋ž˜ํ”„ ์‹œ๊ฐํ™”๋ฅผ ์œ„ํ•ด Runtime Configuration Parameters(rcParams)๋ผ๋Š” ๋”•์…”๋Ÿฌ๋‹ˆ ๊ฐ’์„ ์‚ฌ์šฉ

 - ๊ฒฐ๋ก ์ ์œผ๋กœ plt.rc() ํ•จ์ˆ˜๋ฅผ ํ†ตํ•ด rcParams ๋”•์…”๋„ˆ๋ฆฌ ๊ฐ’์„ ๋ณ€๊ฒฝํ•จ์œผ๋กœ์จ ๋‹ค์–‘ํ•œ ํ˜•ํƒœ์˜ ๊ทธ๋ž˜ํ”„ ์ƒ์„ฑ ๊ฐ€๋Šฅ

๋ฐฑ์—”๋“œ์—์„œ ์‹œ๊ฐํ™”๋ฅผ ํ• ๋•Œ rcParams๋ฅผ ํ† ๋Œ€๋กœ ๊ทธ๋ž˜ํ”„๋ฅผ ๊ทธ๋ฆผ

 

 

 

[ ํžˆ์Šคํ† ๊ทธ๋žจ ๊ทธ๋ž˜ํ”„ ์‚ฌ์šฉ ์˜ˆ์ œ ]

# Jupyter Notebook

# 1907๋…„ ~ 2018๋…„๊นŒ์ง€์˜ ์ตœ๊ณ  ๊ธฐ์˜จ ๊ทธ๋ž˜ํ”„
import csv
f = open('seoul.csv')
data = csv.reader(f)
next(data)
result = []

for row in data :
    if row[-1] != '' :
        result.append(float(row[-1]))

import matplotlib.pyplot as plt
plt.figure(figsize = (10,2), dpi = 300)
plt.plot(result, 'r')
plt.show()  # figure์˜ ์‚ฌ์ด์ฆˆ๋ฅผ ํ‚ค์›Œ๋„ ๋ฐ์ดํ„ฐ์˜ ์–‘์ด ๋„ˆ๋ฌด ๋งŽ์•„ ๋น„๊ต ๋ถˆ๊ฐ€๋Šฅ

๊ธฐ์กด ๊ทธ๋ž˜ํ”„๋ฅผ ์‹œ๊ฐํ™” ํ•˜๊ฒŒ ๋˜๋ฉด ๋ฐ์ดํ„ฐ์˜ ์–‘์ด ๋„ˆ๋ฌด ๋งŽ์•„ ์‹œ๊ฐ์ ์œผ๋กœ ๋น„๊ต ๋ถˆ๊ฐ€๋Šฅํ•จ

ํžˆ์Šคํ† ๊ทธ๋žจ ๊ทธ๋ž˜ํ”„๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๊ธฐ์˜จ๊ฐ„ ๋น„๊ต๋ฅผ ๋ณด๋‹ค ๋ช…ํ™•ํ•˜๊ฒŒ ๊ฐ€๋Šฅ

 

 

โ‘  ํžˆ์Šคํ† ๊ทธ๋žจ ๊ทธ๋ž˜ํ”„ ์ƒ์„ฑ ์˜ˆ์ œ 1

# Jupyter Notebook

import matplotlib.pyplot as plt

# plt.hist(x) : x(ํŠœํ”Œ, ๋ฆฌ์ŠคํŠธ ๋“ฑ)์˜ ๊ตฌ์กฐ๋ฅผ ๊ณ„์‚ฐํ•˜๊ณ  ํžˆ์Šคํ† ๊ทธ๋žจ์œผ๋กœ ์‹œ๊ฐํ™”
# ๊ฐ€๋กœ์ถ• : ๊ณ„๊ธ‰
# ์„ธ๋กœ์ถ• : ๋„์ˆ˜(ํšŸ์ˆ˜ or ๊ฐœ์ˆ˜ ๋“ฑ)

plt.hist([1,1,2,3,4,5,6,6,7,8,10])
plt.show()

plt.show() ์‹คํ–‰ ๊ฒฐ๊ณผ. 1์ด ๋‘๊ฐœ 6์ด ๋‘๊ฐœ์žˆ์Œ์„ ํ™•์ธ ๊ฐ€๋Šฅ

 

 

โ‘ก ํžˆ์Šคํ† ๊ทธ๋žจ ๊ทธ๋ž˜ํ”„ ์ƒ์„ฑ ์˜ˆ์ œ 2

# random ๋ชจ๋“ˆ๊ณผ plt.hist ์‚ฌ์šฉ ์˜ˆ์ œ 
import random
import matplotlib.pyplot as plt

list = []
for i in range(10) :
    # randint(a, b) : a<= N <= b ๋ฒ”์œ„์˜ ๋‚œ์ˆ˜ N์„ ๋ฐ˜ํ™˜
    list.append(random.randint(1,30)) # 1~30 ์‚ฌ์ด ๋‚œ์ˆ˜ 10๊ฐœ ์ƒ์„ฑ ํ›„ ๋ฆฌ์ŠคํŠธ์— ์ €์žฅ
list.sort() # list ์˜ค๋ฆ„์ฐจ์ˆœ ์ •๋ ฌ
print(list)

# hist(bins=(type)): ํžˆ์Šคํ† ๊ทธ๋žจ์˜ ๊ฐ€๋กœ์ถ• ๊ตฌ๊ฐ„์˜ ๊ฐœ์ˆ˜ ์ง€์ • [๊ธฐ๋ณธ๊ฐ’ : 10]
# bins type : int or sequence or str
# bins == sequence : ์ฒซ ๋ฒˆ์งธ ๋นˆ์˜ ์™ผ์ชฝ ๊ฐ€์žฅ์ž๋ฆฌ์™€ ๋งˆ์ง€๋ง‰ ๋นˆ์˜ ์˜ค๋ฅธ์ชฝ ๊ฐ€์žฅ์ž๋ฆฌ๋ฅผ ํฌํ•จํ•œ ๋นˆ ๊ฐ€์žฅ์ž๋ฆฌ ์ •์˜
# bins == str : one of the binning strategies supported by numpy.histogram_bin_edges
plt.hist(list, bins=10, label='bins:10')	# ๊ฐ€๋กœ์ถ• ๊ตฌ๊ฐ„ 10 
plt.hist(list, bins=30, label='bins:30')	# ๊ฐ€๋กœ์ถ• ๊ตฌ๊ฐ„ 30
plt.legend()
plt.show()

bins ๊ฐ’์— ๋”ฐ๋ผ ๊ฐ™์€ ๋ฐ์ดํ„ฐ๋ผ๋„ ๋ฒ”์œ„๊ฐ€ ๋‹ฌ๋ผ ๋‹ค๋ฅด๊ฒŒ ๋ณด์ผ ์ˆ˜ ์žˆ์Œ

 

 

โ‘ข 1907~2018๋…„ ์‚ฌ์ด ์„œ์šธ ์ตœ๊ณ  ๊ธฐ์˜จ ๋ฐ์ดํ„ฐ ํžˆ์Šคํ† ๊ทธ๋žจ ํ‘œ์‹œ

import csv
import matplotlib.pyplot as plt

f = open('seoul.csv')
data = csv.reader(f)
next(data)
result = []

for row in data :
    if row[-1] != '' :
        result.append(float(row[-1]))

plt.figure(dpi = 300)
plt.hist(result, bins = 1000, color = 'r', histtype = 'stepfilled')
# plt.hist(result, bins = 300, color = 'r', histtype = 'stepfilled')
plt.show()

(์ขŒ) bins=1000, (์šฐ) bins=300

 

 

โ‘ฃ 1907~2018๋…„ ์‚ฌ์ด 1์›”๊ณผ 8์›”์˜ ์„œ์šธ์‹œ ์ตœ๊ณ  ๊ธฐ์˜จ ๋น„๊ต

import csv
import matplotlib.pyplot as plt

f = open('seoul.csv')
data = csv.reader(f)
next(data)
aug = []
jan = []

for row in data :
    month = row[0].split('-')[1]
    if row[-1] != '' :
        if month == '08':
            aug.append(float(row[-1]))	# 8์›”์˜ ์ตœ๊ณ ๊ธฐ์˜จ aug ๋ฆฌ์ŠคํŠธ์— ์‚ฝ์ž…
        if month == '01': 
            jan.append(float(row[-1]))  # 1์›”์˜ ์ตœ๊ณ ๊ธฐ์˜จ jan ๋ฆฌ์ŠคํŠธ์— ์‚ฝ์ž…


plt.figure(dpi = 200)
plt.hist(aug, bins = 100, color = 'r', label = 'Aug')
plt.hist(jan, bins = 100, color = 'b', label = 'Jan')
plt.legend()
plt.show()

plt.show() ์‹คํ–‰ ๊ฒฐ๊ณผ