import pandas as pd
import numpy as np
import csv
from datetime import datetime
from datetime import date
import calendar
import requests
import pyodbc
import warnings
warnings.filterwarnings('ignore')
def get_login():
server = ''
db = ''
uid = ''
passwd = ''
with open("C:\\Users\\chris\\Documents\\GitHub\\Jupyter-Notebooks\\azure.txt") as f:
lines = f.read().split("\n")
for i in lines:
if i.split("=")[0] == "server":
server = i.split("=")[1]
elif i.split("=")[0] == "db":
db = i.split("=")[1]
elif i.split("=")[0] == "uid":
uid = i.split("=")[1]
elif i.split("=")[0] == "passwd":
passwd = i.split("=")[1]
return (server, db, uid, passwd)
def get_sheet(spreadsheetID, sheetID, create_date, save):
url = f"https://docs.google.com/spreadsheets/d/{spreadsheetID}/gviz/tq?tqx=out:csv&gid={sheetID}"
res = requests.get(url)
if save == True:
with open(f"vintage-metagame-rawdata-{create_date}.csv", "wb") as f:
f.write(res.content)
return pd.read_csv(url)
def clean_merged_data(create_date, save, raw):
vintage = raw[raw.columns[:11]]
vintage.columns = ['Rank','Player','Wins','Losses','Byes','Arch','Subarch','Deck','Details','Date','Event_Type']
# Replace NA values in 'byes' column with 0.
vintage.Byes = vintage.Byes.fillna(0)
vintage.Byes = vintage.Byes.astype("int")
vintage.Deck = vintage.Deck.fillna('NULL')
# Propagate 'event_type' data to each record.
vintage["Event_Type"].replace({"Showcase Qualifier": "Showcase_Qualifier"}, inplace=True)
event_type = vintage.Event_Type.tolist()
for index,i in enumerate(event_type):
if isinstance(i, str):
new = i
else:
event_type[index] = new
vintage["Event_Type"] = event_type
event_cnt = 1
date_last = ''
etype_last = ''
event_id = []
dates_new = []
for index,row in vintage.iterrows():
month = row['Date'].split("/")[0].zfill(2)
day = row['Date'].split("/")[1].zfill(2)
year = row['Date'].split("/")[2]
if row['Rank'] == 1:
if (f'20{year}-{month}-{day}' == date_last) and (row['Event_Type'] == etype_last):
event_cnt += 1
if (f'20{year}-{month}-{day}' != date_last) or (row['Event_Type'] != etype_last):
event_cnt = 1
etype = row['Event_Type']
event_id.append(f'20{year}-{month}-{day}-{etype}-{event_cnt}')
dates_new.append(f'20{year}-{month}-{day}')
date_last = f'20{year}-{month}-{day}'
etype_last = row['Event_Type']
vintage["Event_ID"] = event_id
# Create a second table called Events. Remove duplicate records such that each row represents a unique event.
events = pd.DataFrame({"Event_ID" : event_id, "Event_Type" : event_type, "Date" : dates_new})
events = events.groupby(["Event_ID"], as_index=False)["Event_Type", "Date"].last()
# Add 'entries' column to Events table to represents number of players in each event.
players = vintage.groupby(["Event_ID"], as_index=False)["Rank"].max()
events = events.merge(players, on="Event_ID")
events.rename(columns={"Rank" : "Entries", "Date" : "Event_Date"}, inplace=True)
# Add 'day_of_week' column to Events table.
events["Day_Of_Week"] = events["Event_Date"].apply(lambda x: calendar.day_name[datetime.strptime(x, "%Y-%m-%d").weekday()])
# Drop 'details' column. Drop 'date' and 'event_type' columns that are now in the Events table.
vintage.drop(["Details"], axis=1, inplace=True)
vintage.drop(["Date"], axis=1, inplace=True)
vintage.drop(["Event_Type"], axis=1, inplace=True)
# Rename 'rank' column to 'finish'.
vintage.rename(columns={"Rank" : "Finish"}, inplace=True)
# Replace commas and quotes because it breaks importing with SQL commands.
vintage["Arch"] = vintage["Arch"].apply(lambda x: str(x).replace(",", ""))
vintage["Subarch"] = vintage["Subarch"].apply(lambda x: str(x).replace(",", ""))
vintage["Deck"] = vintage["Deck"].apply(lambda x: str(x).replace(",", ""))
#vintage['arch'] = vintage['arch'].apply(lambda x: x.replace("'", ''))
#vintage['subarch'] = vintage['subarch'].apply(lambda x: x.replace("'", ''))
#vintage['deck'] = vintage['deck'].apply(lambda x: x.replace("'", ''))
# Save and export to CSV.
if save == True:
vintage.to_csv(f"vintage-results-{create_date}.csv", index=False)
events.to_csv(f"vintage-events-{create_date}.csv", index=False)
return (vintage, events)
create_date = date.today().strftime("%Y-%m-%d")
df = get_sheet("1wxR3iYna86qrdViwHjUPzHuw6bCNeMLb72M25hpUHYk", "1693401931", create_date, save=False)
results, events = clean_merged_data(create_date=create_date, save=False, raw=df)
def upload_data(cursor, table):
global added
global updated
def get_indexes(pkey_index, row_length):
return (pkey_index, [x for x in range(row_length) if x not in pkey_index])
for i in table:
added = 0
updated = 0
if i == 'Results':
pkey_index, value_index = get_indexes(pkey_index=[8, 0], row_length=len(results.columns.to_list()))
columns = results.columns.to_list()
table_name = 'vintage_results'
rows = results
elif i == 'Events':
pkey_index, value_index = get_indexes(pkey_index=[0], row_length=len(events.columns.to_list()))
columns = events.columns.to_list()
table_name = 'vintage_events'
rows = events
for index,row in rows.iterrows():
insert_str = f'INSERT INTO {table_name} ('
for index,i in enumerate(columns):
insert_str += f'{i}'
if index != len(row)-1:
insert_str += ', '
insert_str += ') VALUES ('
for i in range(len(row)):
insert_str += '?'
if i != len(row)-1:
insert_str += ', '
insert_str += ')'
#print(insert_str)
#print(tuple(row.to_list()))
update_str = f'UPDATE {table_name} SET '
for index,i in enumerate(value_index):
update_str += columns[i] + ' = '
if row.to_list()[i] == 'NULL':
update_str += 'NULL'
elif type(row.to_list()[i]) == str:
update_str += "'" + row.to_list()[i].replace("'", "''") + "'"
else:
update_str += str(row.to_list()[i])
if index != len(value_index)-1:
update_str += ', '
update_str += ' WHERE '
for index,i in enumerate(pkey_index):
update_str += columns[i] + ' = '
if type(row.to_list()[i]) == str:
update_str += "'" + row.to_list()[i] + "'"
else:
update_str += str(row.to_list()[i])
if index != len(pkey_index)-1:
update_str += ' AND '
#print(update_str)
try:
cursor.execute(insert_str, tuple(row.to_list()))
added += 1
except:
cursor.execute(update_str)
updated += 1
print(added, updated)
def to_datetime(x):
return datetime.strptime(x, "%Y-%m-%d")
#start_date = '2023-01-01'
start_date = input('Enter import start date (YYYY-MM-DD): ')
try:
start_date = datetime.strptime(start_date, "%Y-%m-%d")
except ValueError:
start_date = datetime.strptime('01-01-2020', "%Y-%m-%d")
events = events[events.Event_Date.apply(to_datetime) >= start_date]
results = results[results.Event_ID.isin(events.Event_ID.unique().tolist())]
# Connect to the database
server, db, uid, passwd = get_login()
conn_str = 'DRIVER={SQL Server};SERVER=tcp:' + f'{server};PORT=1433;DATABASE={db};UID={uid};PWD={passwd}'
conn = pyodbc.connect(conn_str)
cursor = conn.cursor()
# Iterate over each row and upload to database. (~26 mins)
# Events have to be added first because they are foreign keys for Results records.
upload_data(cursor=cursor, table=['Events', 'Results'])
conn.commit()
conn.close()