import pathlib import psycopg2 import os dbhost = os.environ.get('DB_HOST') dbuser = os.environ.get('DB_USER','scribblers') dbpassword = os.environ.get('DB_PASSWORD') dbname = os.environ.get('DB_NAME','scribblers') print("Attempting to connect") conn = psycopg2.connect( host = dbhost, database = dbname, user = dbuser, password = dbpassword, connect_timeout = 3) cur = conn.cursor() def newgroup(name,parentid): print("Creating group",name) if parentid is None: cur.execute("INSERT INTO wgroups (name,parentid) VALUES (%s,NULL) RETURNING id",(name,)) else: cur.execute("INSERT INTO wgroups (name,parentid) VALUES (%s,%s) RETURNING id",(name,parentid)) return cur.fetchone()[0] def addwords(filename,groupid): print("Adding words of files",filename,"to group id",groupid) with open(filename) as file: for line in file: cur.execute("INSERT INTO words (str,groupid) VALUES (%s,%s)",(line.strip(),groupid)) conn.commit() print("Cleaning the database") cur.execute("DELETE FROM words") cur.execute("UPDATE wgroups SET parentid=NULL") cur.execute("DELETE FROM wgroups") conn.commit() folder = pathlib.Path(".") groups = {} for item in folder.rglob("*.txt"): arr = str(item)[0:-4].split("/") for i in range(len(arr)): groups.setdefault(i,set()) groups[i].add(tuple(arr[:i+1])) groupids = {} for i in groups.keys(): for group in groups[i]: name = group[-1] filename="/".join(group) + ".txt" parentid = groupids[tuple(group[:-1])] if len(group)>1 else None newid = newgroup(name,parentid) groupids[group] = newid if pathlib.Path(filename).is_file(): addwords(filename,newid) cur.close() conn.close()