import urllib.request


from urllib.request import urlopen


dataurls=(
        "https://toyokeizai.net/sp/visual/tko/covid19/csv/pcr_positive_daily.csv",
        "https://toyokeizai.net/sp/visual/tko/covid19/csv/pcr_tested_daily.csv",
        "https://toyokeizai.net/sp/visual/tko/covid19/csv/cases_total.csv",
        "https://toyokeizai.net/sp/visual/tko/covid19/csv/recovery_total.csv",
        "https://toyokeizai.net/sp/visual/tko/covid19/csv/death_total.csv",
        "https://toyokeizai.net/sp/visual/tko/covid19/csv/pcr_case_daily.csv",
        "https://toyokeizai.net/sp/visual/tko/covid19/csv/severe_daily.csv",
        "https://toyokeizai.net/sp/visual/tko/covid19/csv/effective_reproduction_number.csv",
        "https://toyokeizai.net/sp/visual/tko/covid19/csv/demography.csv",
        "https://toyokeizai.net/sp/visual/tko/covid19/csv/prefectures.csv"
    )


def load(dataurl):
    fn=dataurl.split("/")[-1]
    with urlopen(dataurl) as inf , open(fn,"wb") as outf:
        print ("downloading :", fn)
        data=inf.read()
        outf.write(data)


import os, certifi
os.environ["SSL_CERT_FILE"]=certifi.where()

for dataurl in dataurls:
    load(dataurl)

downloading : pcr_positive_daily.csv
downloading : pcr_tested_daily.csv
downloading : cases_total.csv
downloading : recovery_total.csv
downloading : death_total.csv
downloading : pcr_case_daily.csv
downloading : severe_daily.csv
downloading : effective_reproduction_number.csv
downloading : demography.csv
downloading : prefectures.csv


import pandas
import sqlite3
from os.path import splitext
import os


def from_csv_to_df(dataurl):
    fn=dataurl.split("/")[-1]
    df=pandas.read_csv(fn)
    return df

def from_csv_to_sql(dataurl):
    fn=dataurl.split("/")[-1]
    df=from_csv_to_df(fn)
    with sqlite3.connect('covid19.db') as con:
        print ("convert ",splitext(fn)[0])
        df.to_sql(splitext(fn)[0],con)
    os.remove(fn)


import os, certifi
os.environ["SSL_CERT_FILE"]=certifi.where()

try:
    os.remove('covid19.db')
except FileNotFoundError:
    pass
for dataurl in dataurls:
    from_csv_to_sql(dataurl)

convert  pcr_positive_daily
convert  pcr_tested_daily

/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pandas/core/generic.py:2779: UserWarning: The spaces in these column names will not be changed. In pandas versions < 0.14, spaces were converted to underscores.
  sql.to_sql(

convert  cases_total
convert  recovery_total
convert  death_total
convert  pcr_case_daily
convert  severe_daily
convert  effective_reproduction_number
convert  demography
convert  prefectures


def dump_table_info(dbf):
    db=sqlite3.connect(dbf)
    cur=db.cursor()
    table_info =cur.execute("select name,sql from sqlite_master where type = 'table'").fetchall()
    for tbl,sql in table_info:
        print ("******* table: {:s} *******".format(tbl))
        print ("sql:",sql)
        print ("------ field info -----")
        cur.execute("PRAGMA table_info({:s});".format(tbl))
        print ("rowid","name","type","nullable","defalut", "pk(primary key)")
        for e in cur.fetchall():
            print (*e)
            
    print("****** data count in tables ******")
    print( "table\tcout")
    for tbl,sql in table_info:
        cur.execute("select count(*) from %s;"%tbl)
        print (tbl,"\t", cur.fetchone()[0])

dump_table_info("covid19.db")

******* table: pcr_positive_daily *******
sql: CREATE TABLE "pcr_positive_daily" (
"index" INTEGER,
  "日付" TEXT,
  "PCR 検査陽性者数(単日)" INTEGER,
  "Unnamed: 2" REAL,
  "Unnamed: 3" TEXT
)
------ field info -----
rowid name type nullable defalut pk(primary key)
0 index INTEGER 0 None 0
1 日付 TEXT 0 None 0
2 PCR 検査陽性者数(単日) INTEGER 0 None 0
3 Unnamed: 2 REAL 0 None 0
4 Unnamed: 3 TEXT 0 None 0
******* table: pcr_tested_daily *******
sql: CREATE TABLE "pcr_tested_daily" (
"index" INTEGER,
  "日付" TEXT,
  "PCR 検査実施件数(単日)" INTEGER
)
------ field info -----
rowid name type nullable defalut pk(primary key)
0 index INTEGER 0 None 0
1 日付 TEXT 0 None 0
2 PCR 検査実施件数(単日) INTEGER 0 None 0
******* table: cases_total *******
sql: CREATE TABLE "cases_total" (
"index" INTEGER,
  "日付" TEXT,
  "入院治療を要する者" INTEGER
)
------ field info -----
rowid name type nullable defalut pk(primary key)
0 index INTEGER 0 None 0
1 日付 TEXT 0 None 0
2 入院治療を要する者 INTEGER 0 None 0
******* table: recovery_total *******
sql: CREATE TABLE "recovery_total" (
"index" INTEGER,
  "日付" TEXT,
  "退院、療養解除となった者" INTEGER
)
------ field info -----
rowid name type nullable defalut pk(primary key)
0 index INTEGER 0 None 0
1 日付 TEXT 0 None 0
2 退院、療養解除となった者 INTEGER 0 None 0
******* table: death_total *******
sql: CREATE TABLE "death_total" (
"index" INTEGER,
  "日付" TEXT,
  "死亡者数" INTEGER
)
------ field info -----
rowid name type nullable defalut pk(primary key)
0 index INTEGER 0 None 0
1 日付 TEXT 0 None 0
2 死亡者数 INTEGER 0 None 0
******* table: pcr_case_daily *******
sql: CREATE TABLE "pcr_case_daily" (
"index" INTEGER,
  "日付" TEXT,
  "国立感染症研究所" INTEGER,
  "検疫所" INTEGER,
  "地方衛生研究所・保健所" INTEGER,
  "民間検査会社" INTEGER,
  "大学等" INTEGER,
  "医療機関" REAL
)
------ field info -----
rowid name type nullable defalut pk(primary key)
0 index INTEGER 0 None 0
1 日付 TEXT 0 None 0
2 国立感染症研究所 INTEGER 0 None 0
3 検疫所 INTEGER 0 None 0
4 地方衛生研究所・保健所 INTEGER 0 None 0
5 民間検査会社 INTEGER 0 None 0
6 大学等 INTEGER 0 None 0
7 医療機関 REAL 0 None 0
******* table: severe_daily *******
sql: CREATE TABLE "severe_daily" (
"index" INTEGER,
  "日付" TEXT,
  "重症者数" INTEGER
)
------ field info -----
rowid name type nullable defalut pk(primary key)
0 index INTEGER 0 None 0
1 日付 TEXT 0 None 0
2 重症者数 INTEGER 0 None 0
******* table: effective_reproduction_number *******
sql: CREATE TABLE "effective_reproduction_number" (
"index" INTEGER,
  "日付" TEXT,
  "実効再生産数" REAL
)
------ field info -----
rowid name type nullable defalut pk(primary key)
0 index INTEGER 0 None 0
1 日付 TEXT 0 None 0
2 実効再生産数 REAL 0 None 0
******* table: demography *******
sql: CREATE TABLE "demography" (
"index" INTEGER,
  "year" INTEGER,
  "month" INTEGER,
  "date" INTEGER,
  "age_group" TEXT,
  "tested_positive" INTEGER,
  "hospitalized" INTEGER,
  "serious" INTEGER,
  "death" INTEGER
)
------ field info -----
rowid name type nullable defalut pk(primary key)
0 index INTEGER 0 None 0
1 year INTEGER 0 None 0
2 month INTEGER 0 None 0
3 date INTEGER 0 None 0
4 age_group TEXT 0 None 0
5 tested_positive INTEGER 0 None 0
6 hospitalized INTEGER 0 None 0
7 serious INTEGER 0 None 0
8 death INTEGER 0 None 0
******* table: prefectures *******
sql: CREATE TABLE "prefectures" (
"index" INTEGER,
  "year" INTEGER,
  "month" INTEGER,
  "date" INTEGER,
  "prefectureNameJ" TEXT,
  "prefectureNameE" TEXT,
  "testedPositive" INTEGER,
  "peopleTested" REAL,
  "hospitalized" REAL,
  "serious" TEXT,
  "discharged" REAL,
  "deaths" TEXT,
  "effectiveReproductionNumber" REAL
)
------ field info -----
rowid name type nullable defalut pk(primary key)
0 index INTEGER 0 None 0
1 year INTEGER 0 None 0
2 month INTEGER 0 None 0
3 date INTEGER 0 None 0
4 prefectureNameJ TEXT 0 None 0
5 prefectureNameE TEXT 0 None 0
6 testedPositive INTEGER 0 None 0
7 peopleTested REAL 0 None 0
8 hospitalized REAL 0 None 0
9 serious TEXT 0 None 0
10 discharged REAL 0 None 0
11 deaths TEXT 0 None 0
12 effectiveReproductionNumber REAL 0 None 0
****** data count in tables ******
table	cout
pcr_positive_daily 	 467
pcr_tested_daily 	 444
cases_total 	 448
recovery_total 	 454
death_total 	 438
pcr_case_daily 	 433
severe_daily 	 447
effective_reproduction_number 	 422
demography 	 10
prefectures 	 19454


#%matplotlib inline #なくても良いようです。
import matplotlib.pyplot as pyplot
import datetime


def plot_positive(pref):
    with sqlite3.connect("covid19.db") as db:
        cur=db.cursor()
        data=cur.execute(
            """
            select year, month, date, testedPositive, peopleTested
            from  prefectures 
            where  prefectureNameJ == \"{}\" 
            and    (year,month) >= (2020,10)
            order by year,month,date;
            """.format(pref)
        ).fetchall()
        xdata=[datetime.date(y,m,d) for y,m,d,*v in data]
        ydata=[v for y,m,d,*v in data]
        pyplot.xlabel("date")
        pyplot.ylabel("Positive")
        pyplot.plot(xdata,[v[0] for v in ydata])
        pyplot.draw()
        locs,labels=pyplot.xticks()
        pyplot.xticks(locs[::2], labels[::2],font="Hiragino Mincho ProN")


plot_positive("茨城県")


pyplot.clf()
plot_positive("東京都")
#pyplot.show()


import io
from io import StringIO, BytesIO

def load_csv_to_df(dataurl):
    fn=dataurl.split("/")[-1]
    with urlopen(dataurl) as inf:
        print ("downloading :", fn)
        data=inf.read().decode('utf-8') # binary dataをUnicodeに変換
        sio=io.StringIO(data) # dataをStringIOにセット。
    df=pandas.read_csv(sio) #
    return df

def plot_positive_df(pref):
    #load(dataurls[-1]) #csv を読んで
    #df=load_csv_to_df(dataurls[-1]) # pandas dataframe に変換
    df= pandas.read_csv(urlopen(dataurls[-1]))
    # .loc()メソッドを使って、データを選択。
    # df=df.loc[(df.prefectureNameJ == "{}".format(pref)) & 
    #           ((df.year > 2020) | ((df.year == 2020) & (df.month >= 10))) , :]
    df=df.loc[df.apply(
                 lambda x:
                    (x.prefectureNameJ == "{}".format(pref))
                    and ((x.year,x.month) > (2020,10)), axis=1) , :]
    #df=df[df.prefectureNameJ == "{}".format(pref)][(df.year > 2020)]
    # "datetime" column を追加。
    df['datetime']=df.apply(lambda x:datetime.date(x.year, x.month,x.date), axis=1)
    # line plotでプロットしてみる。
    df.plot.line(x="datetime",y="testedPositive")


plot_positive_df("茨城県")


plot_positive_df("東京都")

From Web to Plot¶

web上のデータをダウンロード¶

CSVから SQL Dtabaseへの変換¶

sql databaseの確認¶

データのプロット¶

Dataframe を使って、同様のことができるか確認してみる。¶