import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import warnings
"ignore", np.ComplexWarning)
warnings.simplefilter(from haversine import haversine
from IPython.display import HTML
import plotly.graph_objects as go
Stock on Graph
STGCN
Import
import rpy2
import rpy2.robjects as ro
from rpy2.robjects.vectors import FloatVector
from rpy2.robjects.packages import importr
Data intro(2020~2022)
Korea
= pd.read_csv('./dataset/korea_kospi.csv') df_korea
'Open'].mean df_korea.loc[:,
<bound method NDFrame._add_numeric_operations.<locals>.mean of 0 2276.919922
1 2299.090088
2 2277.219971
3 2285.929932
4 2293.070068
...
1227 2580.889893
1228 2604.719971
1229 2593.469971
1230 2574.750000
1231 2547.409912
Name: Open, Length: 1232, dtype: float64>
= pd.concat([df_korea,pd.DataFrame({'Country': "Korea",
df_korea 'Value':(df_korea['Close'] - df_korea['Close'].mean())/df_korea['Close'].std()})],axis=1)
= df_korea.assign(Year = list(map(lambda x: x.split('-')[0],df_korea['Date'])),\
df_korea_add = list(map(lambda x: x.split('-')[1],df_korea['Date'])),\
Mon = list(map(lambda x: x.split('-')[2],df_korea['Date']))) Day
= df_korea_add.Year.astype(np.float64)
df_korea_add.Year = df_korea_add.Mon.astype(np.float64)
df_korea_add.Mon = df_korea_add.Day.astype(np.float64) df_korea_add.Day
= df_korea_add.query("Year>=2020 and Year <= 2022");df_korea_covid df_korea_covid
Date | Open | High | Low | Close | Adj Close | Volume | Country | Value | Year | Mon | Day | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
364 | 2020-01-02 | 2201.209961 | 2202.320068 | 2171.840088 | 2175.169922 | 2175.169922 | 494700 | Korea | -0.719054 | 2020.0 | 1.0 | 2.0 |
365 | 2020-01-03 | 2192.580078 | 2203.379883 | 2165.389893 | 2176.459961 | 2176.459961 | 631600 | Korea | -0.715806 | 2020.0 | 1.0 | 3.0 |
366 | 2020-01-06 | 2154.969971 | 2164.419922 | 2149.949951 | 2155.070068 | 2155.070068 | 592700 | Korea | -0.769651 | 2020.0 | 1.0 | 6.0 |
367 | 2020-01-07 | 2166.600098 | 2181.620117 | 2164.270020 | 2175.540039 | 2175.540039 | 568200 | Korea | -0.718122 | 2020.0 | 1.0 | 7.0 |
368 | 2020-01-08 | 2156.270020 | 2162.320068 | 2137.719971 | 2151.310059 | 2151.310059 | 913800 | Korea | -0.779116 | 2020.0 | 1.0 | 8.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1099 | 2022-12-23 | 2325.860107 | 2333.080078 | 2311.899902 | 2313.689941 | 2313.689941 | 367000 | Korea | -0.370356 | 2022.0 | 12.0 | 23.0 |
1100 | 2022-12-26 | 2312.540039 | 2321.919922 | 2304.199951 | 2317.139893 | 2317.139893 | 427600 | Korea | -0.361672 | 2022.0 | 12.0 | 26.0 |
1101 | 2022-12-27 | 2327.520020 | 2335.989990 | 2321.479980 | 2332.790039 | 2332.790039 | 448300 | Korea | -0.322276 | 2022.0 | 12.0 | 27.0 |
1102 | 2022-12-28 | 2296.449951 | 2296.449951 | 2276.899902 | 2280.449951 | 2280.449951 | 405700 | Korea | -0.454032 | 2022.0 | 12.0 | 28.0 |
1103 | 2022-12-29 | 2265.729980 | 2272.669922 | 2236.379883 | 2236.399902 | 2236.399902 | 361000 | Korea | -0.564919 | 2022.0 | 12.0 | 29.0 |
740 rows × 12 columns
# plt.figure(figsize=(30, 8))
# plt.title('Korea (close)')
# plt.xticks(rotation=45)
# plt.plot(df_korea_covid['Date'], df_korea_covid['Close'], 'co-')
# plt.grid(color='gray', linestyle='--')
US
= pd.read_csv('./dataset/us_nasdaq.csv') df_us
= pd.concat([df_us,pd.DataFrame({'Country': 'United States of America',
df_us 'Value':(df_us['Close'] - df_us['Close'].mean())/df_us['Close'].std()})],axis=1)
= df_us.assign(Year = list(map(lambda x: x.split('-')[0],df_us['Date'])),\
df_us_add = list(map(lambda x: x.split('-')[1],df_us['Date'])),\
Mon = list(map(lambda x: x.split('-')[2],df_us['Date']))) Day
= df_us_add.Year.astype(np.float64)
df_us_add.Year = df_us_add.Mon.astype(np.float64)
df_us_add.Mon = df_us_add.Day.astype(np.float64) df_us_add.Day
= df_us_add.query("Year>=2020 and Year <=2022");df_us_covid df_us_covid
Date | Open | High | Low | Close | Adj Close | Volume | Country | Value | Year | Mon | Day | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
374 | 2020-01-02 | 9039.459961 | 9093.429688 | 9010.889648 | 9092.190430 | 9092.190430 | 2862700000 | United States of America | -0.695392 | 2020.0 | 1.0 | 2.0 |
375 | 2020-01-03 | 8976.429688 | 9065.759766 | 8976.429688 | 9020.769531 | 9020.769531 | 2586520000 | United States of America | -0.722486 | 2020.0 | 1.0 | 3.0 |
376 | 2020-01-06 | 8943.500000 | 9072.410156 | 8943.500000 | 9071.469727 | 9071.469727 | 2810450000 | United States of America | -0.703253 | 2020.0 | 1.0 | 6.0 |
377 | 2020-01-07 | 9076.639648 | 9091.929688 | 9042.549805 | 9068.580078 | 9068.580078 | 2381740000 | United States of America | -0.704349 | 2020.0 | 1.0 | 7.0 |
378 | 2020-01-08 | 9068.030273 | 9168.889648 | 9059.379883 | 9129.240234 | 9129.240234 | 2472620000 | United States of America | -0.681336 | 2020.0 | 1.0 | 8.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1125 | 2022-12-23 | 10437.750000 | 10514.759766 | 10361.820313 | 10497.860352 | 10497.860352 | 3544680000 | United States of America | -0.162131 | 2022.0 | 12.0 | 23.0 |
1126 | 2022-12-27 | 10462.190430 | 10472.320313 | 10340.730469 | 10353.230469 | 10353.230469 | 3827290000 | United States of America | -0.216999 | 2022.0 | 12.0 | 27.0 |
1127 | 2022-12-28 | 10339.200195 | 10414.820313 | 10207.469727 | 10213.290039 | 10213.290039 | 3842970000 | United States of America | -0.270087 | 2022.0 | 12.0 | 28.0 |
1128 | 2022-12-29 | 10321.459961 | 10502.080078 | 10301.059570 | 10478.089844 | 10478.089844 | 4154100000 | United States of America | -0.169631 | 2022.0 | 12.0 | 29.0 |
1129 | 2022-12-30 | 10368.370117 | 10468.309570 | 10324.700195 | 10466.480469 | 10466.480469 | 3959030000 | United States of America | -0.174036 | 2022.0 | 12.0 | 30.0 |
756 rows × 12 columns
# plt.figure(figsize=(30, 8))
# plt.title('US (close)')
# plt.xticks(rotation=45)
# plt.plot(df_us_covid['Date'], df_us_covid['Close'], 'co-')
# plt.grid(color='gray', linestyle='--')
China
= pd.read_csv('./dataset/china_ssec.csv') df_china
= pd.concat([df_china,pd.DataFrame({'Country': 'China',
df_china 'Value':(df_china['Close'] - df_china['Close'].mean())/df_china['Close'].std()})],axis=1)
= df_china.assign(Year = list(map(lambda x: x.split('-')[0],df_china['Date'])),\
df_china_add = list(map(lambda x: x.split('-')[1],df_china['Date'])),\
Mon = list(map(lambda x: x.split('-')[2],df_china['Date']))) Day
= df_china_add.Year.astype(np.float64)
df_china_add.Year = df_china_add.Mon.astype(np.float64)
df_china_add.Mon = df_china_add.Day.astype(np.float64) df_china_add.Day
= df_china_add.query("Year>=2020 and Year <=2022");df_china_covid df_china_covid
Date | Open | High | Low | Close | Adj Close | Volume | Country | Value | Year | Mon | Day | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
361 | 2020-01-02 | 3066.335938 | 3098.100098 | 3066.335938 | 3085.197998 | 3085.197998 | 292500 | China | -0.244680 | 2020.0 | 1.0 | 2.0 |
362 | 2020-01-03 | 3089.021973 | 3093.819092 | 3074.518066 | 3083.785889 | 3083.785889 | 261500 | China | -0.249433 | 2020.0 | 1.0 | 3.0 |
363 | 2020-01-06 | 3070.908936 | 3107.202881 | 3065.309082 | 3083.407959 | 3083.407959 | 312600 | China | -0.250705 | 2020.0 | 1.0 | 6.0 |
364 | 2020-01-07 | 3085.488037 | 3105.450928 | 3084.329102 | 3104.802002 | 3104.802002 | 276600 | China | -0.178700 | 2020.0 | 1.0 | 7.0 |
365 | 2020-01-08 | 3094.239014 | 3094.239014 | 3059.131104 | 3066.893066 | 3066.893066 | 297900 | China | -0.306289 | 2020.0 | 1.0 | 8.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1084 | 2022-12-26 | 3048.196045 | 3071.835938 | 3047.349121 | 3065.562988 | 3065.562988 | 206500 | China | -0.310766 | 2022.0 | 12.0 | 26.0 |
1085 | 2022-12-27 | 3077.750000 | 3098.080078 | 3074.310059 | 3095.570068 | 3095.570068 | 222200 | China | -0.209771 | 2022.0 | 12.0 | 27.0 |
1086 | 2022-12-28 | 3088.620117 | 3098.649902 | 3079.429932 | 3087.399902 | 3087.399902 | 224600 | China | -0.237270 | 2022.0 | 12.0 | 28.0 |
1087 | 2022-12-29 | 3076.729980 | 3086.000000 | 3064.459961 | 3073.699951 | 3073.699951 | 215600 | China | -0.283379 | 2022.0 | 12.0 | 29.0 |
1088 | 2022-12-30 | 3084.520020 | 3096.310059 | 3082.199951 | 3089.260010 | 3089.260010 | 217500 | China | -0.231009 | 2022.0 | 12.0 | 30.0 |
728 rows × 12 columns
# plt.figure(figsize=(30, 8))
# plt.title('China (close)')
# plt.xticks(rotation=45)
# plt.plot(df_china_covid['Date'], df_china_covid['Close'], 'co-')
# plt.grid(color='gray', linestyle='--')
Japan
= pd.read_csv('./dataset/japan_n225.csv') df_japan
= pd.concat([df_japan,pd.DataFrame({'Country': 'Japan',
df_japan 'Value':(df_japan['Close'] - df_japan['Close'].mean())/df_japan['Close'].std()})],axis=1)
= df_japan.assign(Year = list(map(lambda x: x.split('-')[0],df_japan['Date'])),\
df_japan_add = list(map(lambda x: x.split('-')[1],df_japan['Date'])),\
Mon = list(map(lambda x: x.split('-')[2],df_japan['Date']))) Day
= df_japan_add.Year.astype(np.float64)
df_japan_add.Year = df_japan_add.Mon.astype(np.float64)
df_japan_add.Mon = df_japan_add.Day.astype(np.float64) df_japan_add.Day
= df_japan_add.query("Year>=2020 and Year<=2022");df_japan_covid df_japan_covid
Date | Open | High | Low | Close | Adj Close | Volume | Country | Value | Year | Mon | Day | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
367 | 2020-01-06 | 23319.759766 | 23365.359375 | 23148.529297 | 23204.859375 | 23204.859375 | 72800000.0 | Japan | -0.604658 | 2020.0 | 1.0 | 6.0 |
368 | 2020-01-07 | 23320.119141 | 23577.439453 | 23299.919922 | 23575.720703 | 23575.720703 | 64300000.0 | Japan | -0.496240 | 2020.0 | 1.0 | 7.0 |
369 | 2020-01-08 | 23217.490234 | 23303.210938 | 22951.179688 | 23204.759766 | 23204.759766 | 79400000.0 | Japan | -0.604687 | 2020.0 | 1.0 | 8.0 |
370 | 2020-01-09 | 23530.289063 | 23767.089844 | 23506.150391 | 23739.869141 | 23739.869141 | 62200000.0 | Japan | -0.448252 | 2020.0 | 1.0 | 9.0 |
371 | 2020-01-10 | 23813.279297 | 23903.289063 | 23761.080078 | 23850.570313 | 23850.570313 | 55900000.0 | Japan | -0.415890 | 2020.0 | 1.0 | 10.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1093 | 2022-12-26 | 26299.539063 | 26438.650391 | 26294.849609 | 26405.869141 | 26405.869141 | 47300000.0 | Japan | 0.331131 | 2022.0 | 12.0 | 26.0 |
1094 | 2022-12-27 | 26570.779297 | 26620.490234 | 26447.869141 | 26447.869141 | 26447.869141 | 50200000.0 | Japan | 0.343410 | 2022.0 | 12.0 | 27.0 |
1095 | 2022-12-28 | 26309.339844 | 26354.269531 | 26199.669922 | 26340.500000 | 26340.500000 | 61500000.0 | Japan | 0.312021 | 2022.0 | 12.0 | 28.0 |
1096 | 2022-12-29 | 26074.900391 | 26126.699219 | 25953.919922 | 26093.669922 | 26093.669922 | 63100000.0 | Japan | 0.239862 | 2022.0 | 12.0 | 29.0 |
1097 | 2022-12-30 | 26288.000000 | 26321.369141 | 26067.919922 | 26094.500000 | 26094.500000 | 52700000.0 | Japan | 0.240105 | 2022.0 | 12.0 | 30.0 |
731 rows × 12 columns
# plt.figure(figsize=(30, 8))
# plt.title('Japan (close)')
# plt.xticks(rotation=45)
# plt.plot(df_japan_covid['Date'], df_japan_covid['Close'], 'co-')
# plt.grid(color='gray', linestyle='--')
# with plt.style.context('seaborn-white'):
# fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2,figsize=(30,15))
# ax1.plot(df_korea_covid['Date'], df_korea_covid['Close'])
# ax2.plot(df_us_covid['Date'], df_us_covid['Close'])
# ax3.plot(df_china_covid['Date'], df_china_covid['Close'])
# ax4.plot(df_japan_covid['Date'], df_japan_covid['Close'])
= pd.concat([df_korea_covid,df_us_covid,df_china_covid,df_japan_covid]);df df
Date | Open | High | Low | Close | Adj Close | Volume | Country | Value | Year | Mon | Day | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
364 | 2020-01-02 | 2201.209961 | 2202.320068 | 2171.840088 | 2175.169922 | 2175.169922 | 494700.0 | Korea | -0.719054 | 2020.0 | 1.0 | 2.0 |
365 | 2020-01-03 | 2192.580078 | 2203.379883 | 2165.389893 | 2176.459961 | 2176.459961 | 631600.0 | Korea | -0.715806 | 2020.0 | 1.0 | 3.0 |
366 | 2020-01-06 | 2154.969971 | 2164.419922 | 2149.949951 | 2155.070068 | 2155.070068 | 592700.0 | Korea | -0.769651 | 2020.0 | 1.0 | 6.0 |
367 | 2020-01-07 | 2166.600098 | 2181.620117 | 2164.270020 | 2175.540039 | 2175.540039 | 568200.0 | Korea | -0.718122 | 2020.0 | 1.0 | 7.0 |
368 | 2020-01-08 | 2156.270020 | 2162.320068 | 2137.719971 | 2151.310059 | 2151.310059 | 913800.0 | Korea | -0.779116 | 2020.0 | 1.0 | 8.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1093 | 2022-12-26 | 26299.539063 | 26438.650391 | 26294.849609 | 26405.869141 | 26405.869141 | 47300000.0 | Japan | 0.331131 | 2022.0 | 12.0 | 26.0 |
1094 | 2022-12-27 | 26570.779297 | 26620.490234 | 26447.869141 | 26447.869141 | 26447.869141 | 50200000.0 | Japan | 0.343410 | 2022.0 | 12.0 | 27.0 |
1095 | 2022-12-28 | 26309.339844 | 26354.269531 | 26199.669922 | 26340.500000 | 26340.500000 | 61500000.0 | Japan | 0.312021 | 2022.0 | 12.0 | 28.0 |
1096 | 2022-12-29 | 26074.900391 | 26126.699219 | 25953.919922 | 26093.669922 | 26093.669922 | 63100000.0 | Japan | 0.239862 | 2022.0 | 12.0 | 29.0 |
1097 | 2022-12-30 | 26288.000000 | 26321.369141 | 26067.919922 | 26094.500000 | 26094.500000 | 52700000.0 | Japan | 0.240105 | 2022.0 | 12.0 | 30.0 |
2955 rows × 12 columns
'Country'].unique() df[
array(['Korea', 'United States of America', 'China', 'Japan'],
dtype=object)
COVID(2020~2022)
= pd.read_csv('./dataset/WHO-COVID-19-global-data.csv').iloc[:,[0,1,2,4,6]];covid covid
Date_reported | Country_code | Country | New_cases | New_deaths | |
---|---|---|---|---|---|
0 | 2020-01-03 | AF | Afghanistan | 0 | 0 |
1 | 2020-01-04 | AF | Afghanistan | 0 | 0 |
2 | 2020-01-05 | AF | Afghanistan | 0 | 0 |
3 | 2020-01-06 | AF | Afghanistan | 0 | 0 |
4 | 2020-01-07 | AF | Afghanistan | 0 | 0 |
... | ... | ... | ... | ... | ... |
303355 | 2023-07-01 | ZW | Zimbabwe | 0 | 0 |
303356 | 2023-07-02 | ZW | Zimbabwe | 0 | 0 |
303357 | 2023-07-03 | ZW | Zimbabwe | 80 | 2 |
303358 | 2023-07-04 | ZW | Zimbabwe | 0 | 0 |
303359 | 2023-07-05 | ZW | Zimbabwe | 0 | 0 |
303360 rows × 5 columns
'Country'].unique() covid[
array(['Afghanistan', 'Albania', 'Algeria', 'American Samoa', 'Andorra',
'Angola', 'Anguilla', 'Antigua and Barbuda', 'Argentina',
'Armenia', 'Aruba', 'Australia', 'Austria', 'Azerbaijan',
'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus',
'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan',
'Bolivia (Plurinational State of)', 'Bonaire',
'Bosnia and Herzegovina', 'Botswana', 'Brazil',
'British Virgin Islands', 'Brunei Darussalam', 'Bulgaria',
'Burkina Faso', 'Burundi', 'Cabo Verde', 'Cambodia', 'Cameroon',
'Canada', 'Cayman Islands', 'Central African Republic', 'Chad',
'Chile', 'China', 'Colombia', 'Comoros', 'Congo', 'Cook Islands',
'Costa Rica', 'Côte d’Ivoire', 'Croatia', 'Cuba', 'Curaçao',
'Cyprus', 'Czechia', "Democratic People's Republic of Korea",
'Democratic Republic of the Congo', 'Denmark', 'Djibouti',
'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt',
'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia',
'Eswatini', 'Ethiopia', 'Falkland Islands (Malvinas)',
'Faroe Islands', 'Fiji', 'Finland', 'France', 'French Guiana',
'French Polynesia', 'Gabon', 'Gambia', 'Georgia', 'Germany',
'Ghana', 'Gibraltar', 'Greece', 'Greenland', 'Grenada',
'Guadeloupe', 'Guam', 'Guatemala', 'Guernsey', 'Guinea',
'Guinea-Bissau', 'Guyana', 'Haiti', 'Holy See', 'Honduras',
'Hungary', 'Iceland', 'India', 'Indonesia',
'Iran (Islamic Republic of)', 'Iraq', 'Ireland', 'Isle of Man',
'Israel', 'Italy', 'Jamaica', 'Japan', 'Jersey', 'Jordan',
'Kazakhstan', 'Kenya', 'Kiribati', 'Kosovo[1]', 'Kuwait',
'Kyrgyzstan', "Lao People's Democratic Republic", 'Latvia',
'Lebanon', 'Lesotho', 'Liberia', 'Libya', 'Liechtenstein',
'Lithuania', 'Luxembourg', 'Madagascar', 'Malawi', 'Malaysia',
'Maldives', 'Mali', 'Malta', 'Marshall Islands', 'Martinique',
'Mauritania', 'Mauritius', 'Mayotte', 'Mexico',
'Micronesia (Federated States of)', 'Monaco', 'Mongolia',
'Montenegro', 'Montserrat', 'Morocco', 'Mozambique', 'Myanmar',
'Namibia', 'Nauru', 'Nepal', 'Netherlands', 'New Caledonia',
'New Zealand', 'Nicaragua', 'Niger', 'Nigeria', 'Niue',
'North Macedonia',
'Northern Mariana Islands (Commonwealth of the)', 'Norway',
'occupied Palestinian territory, including east Jerusalem', 'Oman',
'Other', 'Pakistan', 'Palau', 'Panama', 'Papua New Guinea',
'Paraguay', 'Peru', 'Philippines', 'Pitcairn Islands', 'Poland',
'Portugal', 'Puerto Rico', 'Qatar', 'Republic of Korea',
'Republic of Moldova', 'Réunion', 'Romania', 'Russian Federation',
'Rwanda', 'Saba', 'Saint Barthélemy',
'Saint Helena, Ascension and Tristan da Cunha',
'Saint Kitts and Nevis', 'Saint Lucia', 'Saint Martin',
'Saint Pierre and Miquelon', 'Saint Vincent and the Grenadines',
'Samoa', 'San Marino', 'Sao Tome and Principe', 'Saudi Arabia',
'Senegal', 'Serbia', 'Seychelles', 'Sierra Leone', 'Singapore',
'Sint Eustatius', 'Sint Maarten', 'Slovakia', 'Slovenia',
'Solomon Islands', 'Somalia', 'South Africa', 'South Sudan',
'Spain', 'Sri Lanka', 'Sudan', 'Suriname', 'Sweden', 'Switzerland',
'Syrian Arab Republic', 'Tajikistan', 'Thailand',
'The United Kingdom', 'Timor-Leste', 'Togo', 'Tokelau', 'Tonga',
'Trinidad and Tobago', 'Tunisia', 'Türkiye', 'Turkmenistan',
'Turks and Caicos Islands', 'Tuvalu', 'Uganda', 'Ukraine',
'United Arab Emirates', 'United Republic of Tanzania',
'United States of America', 'United States Virgin Islands',
'Uruguay', 'Uzbekistan', 'Vanuatu',
'Venezuela (Bolivarian Republic of)', 'Viet Nam',
'Wallis and Futuna', 'Yemen', 'Zambia', 'Zimbabwe'], dtype=object)
"Country_code =='KP'")['New_cases'].unique() covid.query(
array([0])
= pd.read_excel('./dataset/covid_korea.csv');_korea_covid[:10] _korea_covid
Unnamed: 0 | Unnamed: 1 | Unnamed: 2 | Unnamed: 3 | Unnamed: 4 | |
---|---|---|---|---|---|
0 | NaN | NaN | NaN | NaN | NaN |
1 | NaN | NaN | NaN | NaN | NaN |
2 | NaN | NaN | NaN | NaN | NaN |
3 | 일자 | 계(명) | 국내발생(명) | 해외유입(명) | 사망(명) |
4 | 누적(명) | 32415857 | 32336444 | 79413 | 35111 |
5 | 2020-01-20 00:00:00 | 1 | - | 1 | - |
6 | 2020-01-21 00:00:00 | 0 | - | - | - |
7 | 2020-01-22 00:00:00 | 0 | - | - | - |
8 | 2020-01-23 00:00:00 | 0 | - | - | - |
9 | 2020-01-24 00:00:00 | 1 | - | 1 | - |
= _korea_covid.assign(New_cases=np.where(_korea_covid['Unnamed: 2'] == '-', 0, _korea_covid['Unnamed: 2']),\
korea_covid =np.where(_korea_covid['Unnamed: 4'] == '-', 0, _korea_covid['Unnamed: 4']),\
New_deaths=list(map(lambda x: str(x).split()[0],_korea_covid['Unnamed: 0'])),\
Date_reported='Korea',Country_code='KP').\
Country5:,[7,9,8,6,5]];korea_covid iloc[
Date_reported | Country_code | Country | New_deaths | New_cases | |
---|---|---|---|---|---|
5 | 2020-01-20 | KP | Korea | 0 | 0 |
6 | 2020-01-21 | KP | Korea | 0 | 0 |
7 | 2020-01-22 | KP | Korea | 0 | 0 |
8 | 2020-01-23 | KP | Korea | 0 | 0 |
9 | 2020-01-24 | KP | Korea | 0 | 0 |
... | ... | ... | ... | ... | ... |
1268 | 2023-07-06 | KP | Korea | 12 | 24339 |
1269 | 2023-07-07 | KP | Korea | 7 | 24757 |
1270 | 2023-07-08 | KP | Korea | 3 | 25659 |
1271 | 2023-07-09 | KP | Korea | 2 | 22560 |
1272 | 2023-07-10 | KP | Korea | 2 | 9253 |
1268 rows × 5 columns
= pd.concat([covid, korea_covid]).rename(columns={'Date_reported': 'Date'});covid_final covid_final
Date | Country_code | Country | New_cases | New_deaths | |
---|---|---|---|---|---|
0 | 2020-01-03 | AF | Afghanistan | 0 | 0 |
1 | 2020-01-04 | AF | Afghanistan | 0 | 0 |
2 | 2020-01-05 | AF | Afghanistan | 0 | 0 |
3 | 2020-01-06 | AF | Afghanistan | 0 | 0 |
4 | 2020-01-07 | AF | Afghanistan | 0 | 0 |
... | ... | ... | ... | ... | ... |
1268 | 2023-07-06 | KP | Korea | 24339 | 12 |
1269 | 2023-07-07 | KP | Korea | 24757 | 7 |
1270 | 2023-07-08 | KP | Korea | 25659 | 3 |
1271 | 2023-07-09 | KP | Korea | 22560 | 2 |
1272 | 2023-07-10 | KP | Korea | 9253 | 2 |
304628 rows × 5 columns
= covid_final.assign(Year = list(map(lambda x: x.split('-')[0],covid_final['Date'])),\
covid_final_add = list(map(lambda x: x.split('-')[1],covid_final['Date'])),\
Mon = list(map(lambda x: x.split('-')[2],covid_final['Date']))) Day
covid_final_add
Date | Country_code | Country | New_cases | New_deaths | Year | Mon | Day | |
---|---|---|---|---|---|---|---|---|
0 | 2020-01-03 | AF | Afghanistan | 0 | 0 | 2020 | 01 | 03 |
1 | 2020-01-04 | AF | Afghanistan | 0 | 0 | 2020 | 01 | 04 |
2 | 2020-01-05 | AF | Afghanistan | 0 | 0 | 2020 | 01 | 05 |
3 | 2020-01-06 | AF | Afghanistan | 0 | 0 | 2020 | 01 | 06 |
4 | 2020-01-07 | AF | Afghanistan | 0 | 0 | 2020 | 01 | 07 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
1268 | 2023-07-06 | KP | Korea | 24339 | 12 | 2023 | 07 | 06 |
1269 | 2023-07-07 | KP | Korea | 24757 | 7 | 2023 | 07 | 07 |
1270 | 2023-07-08 | KP | Korea | 25659 | 3 | 2023 | 07 | 08 |
1271 | 2023-07-09 | KP | Korea | 22560 | 2 | 2023 | 07 | 09 |
1272 | 2023-07-10 | KP | Korea | 9253 | 2 | 2023 | 07 | 10 |
304628 rows × 8 columns
# with plt.style.context('seaborn-white'):
# fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2,figsize=(30,15))
# ax1.plot(covid_final_add.query("Country =='Korea'")['Date'], covid_final_add.query("Country =='Korea'")['New_cases'])
# ax2.plot(covid_final_add.query("Country =='United States of America'")['Date'], covid_final_add.query("Country =='United States of America'")['New_cases'])
# ax3.plot(covid_final_add.query("Country =='China'")['Date'], covid_final_add.query("Country =='China'")['New_cases'])
# ax4.plot(covid_final_add.query("Country =='Japan'")['Date'], covid_final_add.query("Country =='Japan'")['New_cases'])
define class
class Stock_graph:
def __init__(self,df):
self.df = df
self.f = df.Value.to_numpy()
self.year = df.Year.to_numpy()
self.mon = df.Mon.to_numpy()
self.day = df.Day.to_numpy()
self.country = df.Country
self.n = len(self.f)
self.theta= None
def get_distance(self):
self.D = np.zeros([self.n,self.n])
= np.stack([self.lat, self.long],axis=1)
locations for i in tqdm.tqdm(range(self.n)):
for j in range(i,self.n):
self.D[i,j]=haversine(locations[i],locations[j])
self.D = self.D+self.D.T
def get_weightmatrix(self,theta=1,beta=0.5,kappa=4000):
self.theta = theta
= np.where(self.D<kappa,self.D,0)
dist self.W = np.exp(-(dist/self.theta)**2)
def _eigen(self):
= self.W.sum(axis=1)
d= np.diag(d)
Dself.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
self.lamb, self.Psi = np.linalg.eigh(self.L)
self.Lamb = np.diag(self.lamb)
def fit(self,ref=0.5): # fit with ebayesthresh
self._eigen()
self.fbar = self.Psi.T @ self.f # fbar := graph fourier transform of f
self.power = self.fbar**2
= importr('EbayesThresh').ebayesthresh
ebayesthresh self.power_threshed=np.array(ebayesthresh(FloatVector(self.fbar**2)))
self.fbar_threshed = np.where(self.power_threshed>0,self.fbar,0)
self.fhat = self.Psi@self.fbar_threshed
self.df = self.df.assign(ValueHat = self.fhat)
self.df = self.df.assign(Residual = self.df.Value- self.df.ValueHat)
시도 1
= Stock_graph(df) rst
len(rst.df)
2955
= len(rst.df) T
'Country']=='Korea'].iloc[:,[8,9]],rst.df[rst.df['Country']=='Japan'].iloc[:,[8,9]]) plt.scatter(rst.df[rst.df[
ValueError: x and y must be the same size
'Country']=='Japan'].iloc[:,[8,9]] rst.df[rst.df[
Value | Year | |
---|---|---|
367 | -0.604658 | 2020.0 |
368 | -0.496240 | 2020.0 |
369 | -0.604687 | 2020.0 |
370 | -0.448252 | 2020.0 |
371 | -0.415890 | 2020.0 |
... | ... | ... |
1093 | 0.331131 | 2022.0 |
1094 | 0.343410 | 2022.0 |
1095 | 0.312021 | 2022.0 |
1096 | 0.239862 | 2022.0 |
1097 | 0.240105 | 2022.0 |
731 rows × 2 columns
= np.zeros((T,T))
W for i in range(T):
for j in range(T):
if np.abs(i-j) ==1 :
= 1
W[i,j] else:
= 0 W[i,j]
W
= W rst.W
rst.fit()
Result
='Date').sort_values("Residual",ascending=False).iloc[:30,:] rst.df.merge(covid_final_add,on
시도 2
= Stock_graph(df) rst2
len(rst2.df)
T
rst2.df
'Country').iloc[:,[9,10]].reset_index().iloc[:,1:].pivot(columns='Country', values='Value').iloc[:,1].dropna() rst2.df.sort_values(
Result
covid_final_add