Substitution-Requests/helperFunctions.py

#commonly-used helper functions for the other scripts
#By Drew Murray murraydr@msu.edu

import pandas as pd
import copy as cp
import smtplib
import os.path
from datetime import datetime, timedelta

#libraries for google forms
from apiclient import discovery
from httplib2 import Http
from oauth2client import client, file, tools
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from google.auth.transport.requests import Request

sectionDatabaseFilename="sectionsDatabase.csv"
staffDatabaseFilename="staffDatabase.csv"


#Converts full name to nedID based on the staff database
#Uses a mildly 'fuzzy' search where the searched-for string must only occur *somewhere* in the actual full name
def nameToID(searchName,getAllMatches=False):
	if searchName == "":
		return ""
	stfD = pd.read_csv(staffDatabaseFilename,dtype=str,index_col=False)
	try:
		ret = stfD[stfD['Name'].str.contains(searchName.strip("*"),case=False,regex=False)]["NetID"].values
	except IndexError:
		print ('ERROR: name "'+searchName+'" does not appear in the staff database')
		ret = -1
	if getAllMatches:
		return ret
	else:
		return ret[0]

#Converts netID to full name for GUI human-readability
def IDToName(searchID):
	if searchID=="":
		return ""
	stfD = pd.read_csv(staffDatabaseFilename,dtype=str,index_col=False)
	name = stfD.loc[stfD['NetID']==searchID].values[0][0]
	if stfD.loc[stfD['NetID']==searchID].values[0][3]=="1":#if returning staff
		name+="*"#append asterisk to name
	return name

#Tests if two requests overlap i.e. have at least one day/time combo in common
def isRequestOverlap(req1,req2):
	print(req1)
	print(req2)
	sections1=req1[2].split(';')
	sections2=req2[2].split(';')
	dates1=req1[3].split(';')
	dates2=req2[3].split(';')
	overlap=False
	for s1 in sections1:
		for s2 in sections2:
			for d1 in dates1:
				for d2 in dates2:
					if s1==s2 and d1==d2:
						overlap=True
	return overlap

#Get the time that a section takes place during
def getTimeFromSection(section):
	secD = pd.read_csv(sectionDatabaseFilename,dtype=str,index_col=False)
	return(secD.loc[secD['Section']==section]['Time'].iloc[0])

#Get the location that a section takes place in
def getLocationFromSection(section):
	secD = pd.read_csv(sectionDatabaseFilename,dtype=str,index_col=False)
	return(secD.loc[secD['Section']==section]['Location'].iloc[0])

#Get all dates that a section takes place in
def getDatesFromSection(sec):
	secD = pd.read_csv(sectionDatabaseFilename,dtype=str,index_col=False)
	dates = list(secD.loc[secD["Section"]==sec]["Date"])
	return dates

#Tests if a netID is currently assigned to a given section on a given date
def isAssigned(netID,period,date):
	secD = pd.read_csv(sectionDatabaseFilename,dtype=str,index_col=False)
	ret= False
	filt = (secD.loc[(secD['Section']==period) & (secD['Date']==date)].apply(lambda r: r.astype('string').str.contains(netID).any(), axis=1))
	if filt.any():	#if the requestor is among the names in that section-date
		ret=True
	return ret

#Overwrite a netID in one row of the section database with another
def reassign(date,period,oldID,newID):
	secD = pd.read_csv(sectionDatabaseFilename,dtype=str,index_col=False)
	secD.loc[(secD['Section']==period) & (secD['Date']==date)]=secD.loc[(secD['Section']==period) & (secD['Date']==date)].replace(oldID,newID)
	secD.to_csv(sectionDatabaseFilename,index=False,index_label=False)

#Get the strings of all section titles currently in the database (dropping duplicates)
def getAllSectionTitles():
	secD = pd.read_csv(sectionDatabaseFilename,dtype=str,index_col=False)
	sections=list(secD.drop_duplicates(subset=["Section"])["Section"].values)
	return sections

#Get the strings of all names currently in the staff database
def getAllNames():
	stfD = pd.read_csv(staffDatabaseFilename,dtype=str,index_col=False)
	return list(stfD["Name"].values)

#Add or subtract from the count of a given person's sub request history in one category (e.g. rejections)
def incrementSubCount(netID,category,amount=1):
	stfD = pd.read_csv(staffDatabaseFilename,dtype=str,index_col=False)
	if category==0 or category=="APP":
		columnName="Approved Substitutions"
	elif category==1 or category=="ACC":
		columnName="Accepted Substitutions"
	elif category==2 or category=="REJ":
		columnName="Rejected Substitutions"
	elif category==3 or category=="CAN":
		columnName="Cancelled Substitutions"
	elif category==4 or category=="FUL":
		columnName="Fulfilled Substitutions"
	else:
		print("ERROR: Invalid category to incrementSubCount")
		return
	stfD.loc[stfD['NetID']==netID,columnName]=str(float(stfD.loc[stfD['NetID']==netID,columnName])+amount)
	stfD.to_csv(staffDatabaseFilename,index=False,index_label=False)

#Get an array of the counts of each category of sub history (Approved, Accepted, Rejected, Cancelled, Fulfilled) for a given person
def getSubCount(netID):
	stfD = pd.read_csv(staffDatabaseFilename,dtype=str,index_col=False)
	APP = stfD.loc[(stfD["NetID"]==netID)]["Approved Substitutions"].values[0]
	ACC = stfD.loc[(stfD["NetID"]==netID)]["Accepted Substitutions"].values[0]
	REJ = stfD.loc[(stfD["NetID"]==netID)]["Rejected Substitutions"].values[0]
	CAN = stfD.loc[(stfD["NetID"]==netID)]["Cancelled Substitutions"].values[0]
	FUL = stfD.loc[(stfD["NetID"]==netID)]["Fulfilled Substitutions"].values[0]
	return (APP,ACC,REJ,CAN,FUL)

#Get a "high score" list or a "low score" list of names/scores based on a numeric string showing which categories to count
#0: Approved, 1: Accepted, 2: Rejected, 3: Cancelled, 4: Fulfilled
#E.g. "12" gives the ULAs with the most total accepted and rejected requests thusfar.
def getTopSubs(categoryStr,fewest=False,num=1):
	if categoryStr=="":
		return ""
	stfD = pd.read_csv(staffDatabaseFilename,dtype=str,index_col=False)
	columns=[]
	if "0" in categoryStr:
		columns+=["Approved Substitutions"]
	if "1" in categoryStr:
		columns+=["Accepted Substitutions"]
	if "2" in categoryStr:
		columns+=["Rejected Substitutions"]
	if "3" in categoryStr:
		columns+=["Cancelled Substitutions"]
	if "4" in categoryStr:
		columns+=["Fulfilled Substitutions"]
	header=""
	for name in columns:
		header+=name[:3].upper()+"+"
	header=header[:-1]
	stfD[header]=0
	for name in columns:
		stfD[header]+=(stfD[name].astype(float))
	return stfD.sort_values(header,axis=0,ascending = fewest).head(num)[["Name",header]].to_string(index=False)

#Test if a given single shift exists, i.e. if that section meets on that date.
def shiftExists(period,date):
	secD = pd.read_csv(sectionDatabaseFilename,dtype=str,index_col=False)
	try:
		temp=secD[(secD['Section']==period) & (secD['Date']==date)]
		temp.values[0]
	except IndexError:
		return False
	return True

#Get a list of all names currently assigned to a given section on a given date
def getAllNamesFromSection(periods,dates):
	secD = pd.read_csv(sectionDatabaseFilename,dtype=str,index_col=False)
	others=[]
	for period in periods.split(';'):
		for date in dates.split(';'):
			try:
				temp=secD[(secD['Section']==period) & (secD['Date']==date)]
				netIDs=list(temp.values[0][5:16])
				netIDs = [IDToName(x) for x in netIDs if pd.notna(x)]
				others.append(cp.deepcopy(netIDs))
			except IndexError:
				others.append("NoShift")
	return(others)

#finds which date is earlier in string MM/DD format
def dateBeforeOrEqual(date1, date2):
	month1,day1 = date1.split('/')
	month2,day2 = date2.split('/')
	month1=int(month1)
	month2=int(month2)
	day1=int(day1)
	day2=int(day2)
	if month1<month2:
		return True
	elif month1>month2:
		return False
	elif day1<=day2:
		return True
	else:
		return False

#create string files for easy pasting into Google forms
def createStrings():
	stfD = pd.read_csv(staffDatabaseFilename,dtype=str,index_col=False)
	secD = pd.read_csv(sectionDatabaseFilename,dtype=str,index_col=False)
	with open("stringOutputs/staffIDs.txt",'w') as f:
		netIDs=stfD['NetID'].values
		netIDs.sort()
		f.write('\n'.join(netIDs))

	with open("stringOutputs/emails.txt",'w') as f:
		vals=cp.deepcopy(stfD['NetID'].values)
		for i in range(len(vals)):
			vals[i]=vals[i]+"@msu.edu"
		f.write('; '.join(vals))

	with open("stringOutputs/sectionTitles.txt",'w') as f:
		vals=secD['Section'].values
		seen=set()
		f.write('\n'.join([x for x in vals if not (x in seen or seen.add(x))]))

	with open("stringOutputs/periodDates.txt",'w') as f:
		vals=secD['Date'].values
		seen=set()
		f.write('\n'.join([x for x in vals if not (x in seen or seen.add(x))]))

#Create the content of form-letter emails to be sent based on the request details and the chosen approval status
def generateEmails(requestList):
	emails=[]
	for req in requestList:

		#Unpack the request into named variables (for readability in the string formatting below)
		timestamp = req[0]
		requestor = IDToName(req[1]).strip("*")
		section = req[2]
		date = req[3]
		replacement = IDToName(req[4]).strip("*")
		reason = req[5]
		status = req[6]
		statusReason = req[7]

		#Check if a replacement was specified at all
		if replacement!="":
			replaced=True
		else:
			replaced=False

		#Unpack sections and dates to always be a list
		if ";" in section:
			sections = section.split(";")
		else:
			sections=[section]
		if ";" in date:
			dates = date.split(";")
		else:
			dates=[date]

		times=[]
		locations=[]
		if status=="APP" or status=="ACC": #For now, don't tell the ULAs that their acceptance was begrudging so they don't try and game the system by lying about the real reason.  We can change this later if desired.
			status="approved"
			changed=True	#Our database entries were changed
			for s in sections:
				times.append(getTimeFromSection(s))
				locations.append(getLocationFromSection(s))
		elif status=="REJ" or status=="CAN":
			if status == "REJ":
				status="rejected"
			else:
				status="cancelled"
			changed=False	#Our database entries were changed
			for s in sections:
				try:
					times.append(getTimeFromSection(s))
				except:
					times=["INVALID DATE/SECTION"]
				try:
					locations.append(getLocationFromSection(s))
				except:
					locations=["INVALID DATE/SECTION"]
		else:
			print ('ERROR: Status of request is "'+str(status)+'" instead of APP/ACC/REJ/CAN')
			return

		recipient = req[1]+"@msu.edu"
		subject = "Your request for substitution on "+date+" has been "+status+"."

		sectionStrings=[]
		for i in range(len(sections)):
			if "HR" in sections[i]:#helproom
				sectionStrings.append(sections[i].replace("_HR"," helproom (hour ")+")")
				sectionStrings[i]=sectionStrings[i].replace("M","Monday")
				sectionStrings[i]=sectionStrings[i].replace("T","Tuesday")
				sectionStrings[i]=sectionStrings[i].replace("W","Wednesday")
				sectionStrings[i]=sectionStrings[i].replace("R","Thursday")
				sectionStrings[i]=sectionStrings[i].replace("F","Friday")
				sectionStrings[i]=sectionStrings[i].replace("S","Sunday")
			else:
				sectionStrings.append(sections[i].replace("Sec","Section"))

		message = "Hello "+requestor+","
		message+="\n\nYour request to be substituted out of ["+" and ".join(sectionStrings)+"] at ["+" and ".join(times)+"] on ["+" and ".join(dates)+'] because of "'+reason+'" was recieved at ['+timestamp+"]."
		if replaced:
			message+="\nYou specified "+replacement+" as your replacement"
			if changed:
				message+=" and they have also recieved an automatically generated email notifying them of the switch."
			else:
				message+="."
		else:
			message+="\nYou did not specify a replacement."
		message+="\n\nThis request has been reviewed by the professors and *"+status+"*"
		if statusReason != "INST. REASON":
			message+=' for the given reason of: "'+statusReason+'"'
		if changed:
			message+=" and the corresponding changes have been made in our calander.\nIf all of the above information is correct, no further action is necessary.  If any of this information is incorrect, please contact us IMMEDIATELY on Slack so we can correct it."
		else:
			message+= " and no changes have been made in our calander.  If you did not submit this request, please contact us on Slack as there may be an error in our system."
		message+="\n\nThis email was automatically generated.  Do not reply or send email to this address as it will not be seen.  All communication should occur in Slack or by emailing cse102@msu.edu"

		emails.append([recipient,subject,message])

		#Also send email to new ULA if their subbing IN was approved/accepted
		if changed and replaced:
			recipient = req[4]+"@msu.edu"
			subject = "You have been scheduled to substitute on "+date+"."

			message = "Hello "+replacement+","
			message+="\n\nYou have been scheduled to substitute for "+requestor+" in ["+" and ".join(sectionStrings)+"] at ["+" and ".join(times)+"] on ["+" and ".join(dates)+"] at ["+" and ".join(locations)+"]"
			message+="\n\n"+requestor+" has specified that you have already agreed to this by submitting the request with you as a replacement and thus our calander has been changed."
			message+="\nIf all of the above information is correct, no further action is necessary.  If any of this information is incorrect, please contact us IMMEDIATELY on Slack so we can correct it."
			message+="\n\nThis email was automatically generated.  Do not reply or send email to this address as it will not be seen.  All communication should occur in Slack or by emailing cse102@msu.edu"

			emails.append([recipient,subject,message])

		#Also send email to GA if their helproom ULAs have been changed.
		if changed:
			for s in sections:
				if s in ["M_HR1","M_HR2","T_HR1","T_HR2","W_HR1","W_HR2","R_HR1","R_HR2","F_HR1","F_HR2","S_HR1","S_HR2"]:
					for date in dates:
						names=getAllNamesFromSection(s,date)[0]
						print(names,names!="NoShift")
						if names!="NoShift":
							recipient = nameToID(names[0])
							subject="Change in helproom staff on "+date
							message="There has been a substitution in your helproom section: "+s+".\nYour ULAs on "+date+" are "+", ".join(names[1:])

							emails.append([recipient,subject,message])

	#Send emails (or print to terminal if debugging and actuallySend == False)
	sendEmails(emails,actuallySend=False)


#duumyParam is to make really sure that the author of the code that calls this functions really intends to actually send emails (by forcing them to use the keyword)
def sendEmails(emails,dummyParam="DUMMY",actuallySend=False):

	#Catch incorrect use of this method by someone who doesn't understand the args.
	if dummyParam!="DUMMY":
		print("DON'T USE THE helperFunctions.sendEmails() method unless you know what you're doing! This can send actual emails to people's inbox!")
		return

	if actuallySend:
		#https://www.geeksforgeeks.org/send-mail-gmail-account-using-python/
		# creates SMTP session
		s = smtplib.SMTP('smtp.gmail.com', 587)

		# start TLS for security
		s.starttls()

		# Authentication
		s.login("cse102msu@gmail.com", "whol ydag otqa hxps")

		print("BEGINNING EMAIL SENDING!")
		for email in emails:
			address = email[0]
			subject = email[1]
			message = email[2]
			messageObj = 'Subject: {}\n\n{}'.format(subject, message)

			# sending the mail
			s.sendmail("cse102msu@gmail.com", address, messageObj)

		# terminating the session
		print("Emails sent!")
		s.quit()
	else:
		print('\n!!!PRINTING TO TERMINAL INSTEAD TO PREVENT ACCIDENTALY EMAIL SPAM!!!:\n\t(Use the argument "acutallySend=True" in "sendEmails" function to disable this.)')
		for email in emails:
			print("\n"+"-"*80+"\nEmail would be sent to "+email[0]+":\nSubject: "+email[1]+"\nMessage:\n"+email[2]+"\n"+"-"*80)


#Get the raw data from the Google Forms API
#FYI: The first part of this code dealing with connection/authentication was not written by me and is largely a black box.
#This function only adds requests that are more recent than the last time it was run, so that the original data in the Google cloud never needs to be cleared
def getForms(subRequestsFilename):

	#BLACKBOX AUTHENTICATION MAGIC
	#--------------------------------------------------------------------------
	#Requires some installation/setup https://developers.google.com/forms/api/quickstart/python

	SCOPES = ["https://www.googleapis.com/auth/forms.responses.readonly"]
	DISCOVERY_DOC = "https://forms.googleapis.com/$discovery/rest?version=v1"
	creds = None

	# The file token.json stores the user's access and refresh tokens, and is
	# created automatically when the authorization flow completes for the first
	# time.
	if os.path.exists('token.json'):
		creds = Credentials.from_authorized_user_file('token.json', SCOPES)

	store = file.Storage('token.json')
	if not creds or not creds.valid:
		if creds and creds.expired and creds.refresh_token:
			creds.refresh(Request())
		else:
			flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
			creds = flow.run_local_server(port=0)
		#Save the credentials for the next run
		with open('token.json', 'w') as token:
			token.write(creds.to_json())

	#service = discovery.build('forms', 'v1', http=creds.authorize(Http()), discoveryServiceUrl=DISCOVERY_DOC, static_discovery=False)

	service = build('forms', 'v1', credentials=creds)

	# gets the responses of your specified form:
	form_id = '1x-8fkuMAcQlTl36SdsbCG0tfClKAcvNshnV8L_Hl904'
	result = service.forms().responses().list(formId=form_id).execute()

	#END OF BLACKBOX AUTHENTICATION MAGIC
	#--------------------------------------------------------------------------

	subs = pd.read_csv(subRequestsFilename,dtype=str)

	#Check when the last time the data was downloaded
	with open("lastUpdatedToken.txt",'r') as f:
		line = f.readline()
		if line != "":
			prevTime=datetime.strptime(line,"%Y-%m-%d %H:%M:%S.%f")
		else:
			prevTime=datetime.strptime("1975-01-01 01:01:01.000000","%Y-%m-%d %H:%M:%S.%f")#If the file was blank, (such as by just being created) use an aribtrary very early date

	data=result["responses"]
	#Unpack the ugly data structure that Google Forms returns
	for req in data:
		try:
			reason=req["answers"]["22a5ae9b"]["textAnswers"]["answers"][0]["value"]
		except KeyError:#No reason specified
			reason = ""
		requestor=req["answers"]["7bb6a9dd"]["textAnswers"]["answers"][0]["value"]

		dates=""
		for i in req["answers"]["11d3b4f8"]["textAnswers"]["answers"]:
			dates+=i["value"]+";"
		dates=dates[:-1]

		try:
			replacement=req["answers"]["30178530"]["textAnswers"]["answers"][0]["value"]
		except KeyError:#no replacement specified
			replacement=""

		sections=""
		for i in req["answers"]["5684403f"]["textAnswers"]["answers"]:
			sections+=i["value"]+";"
		sections=sections[:-1]

		timeStr = req["createTime"][:-1].replace("T"," ")+"000"

		#The timestamp needs formatting adjustment and a time-zone shift to EST
		timeStamp=datetime.strptime(timeStr, '%Y-%m-%d %H:%M:%S.%f')-timedelta(hours=4,minutes=0)

		#If the request is more recent than our last download, then our database doesn't yet 'know' about it and it needs to be added.
		if timeStamp>prevTime:
			reqDict={"Timestamp": [timeStr], "Requestor": [requestor], "Section": [sections], "Dates": [dates], "Replacement": [replacement], "Reason": [reason]}
			subs=pd.concat([subs,pd.DataFrame(reqDict)],ignore_index=True)
	subs.sort_values(by=["Timestamp"],inplace=True)

	#Write the updated request list to file
	subs.to_csv(subRequestsFilename,index=False,index_label=False)
	#Write the timestamp to the token for checking when this function was last run
	with open("lastUpdatedToken.txt",'w') as f:
		f.write(str(datetime.now()))