63 lines
1.6 KiB
Python

import pathlib
import psycopg2
import os
dbhost = os.environ.get('DB_HOST')
dbuser = os.environ.get('DB_USER','scribblers')
dbpassword = os.environ.get('DB_PASSWORD')
dbname = os.environ.get('DB_NAME','scribblers')
print("Attempting to connect")
conn = psycopg2.connect(
host = dbhost,
database = dbname,
user = dbuser,
password = dbpassword,
connect_timeout = 3)
cur = conn.cursor()
def newgroup(name,parentid):
print("Creating group",name)
if parentid is None:
cur.execute("INSERT INTO wgroups (name,parentid) VALUES (%s,NULL) RETURNING id",(name,))
else:
cur.execute("INSERT INTO wgroups (name,parentid) VALUES (%s,%s) RETURNING id",(name,parentid))
return cur.fetchone()[0]
def addwords(filename,groupid):
print("Adding words of files",filename,"to group id",groupid)
with open(filename) as file:
for line in file:
cur.execute("INSERT INTO words (str,groupid) VALUES (%s,%s)",(line.strip(),groupid))
conn.commit()
print("Cleaning the database")
cur.execute("DELETE FROM words")
cur.execute("DELETE FROM wgroups")
conn.commit()
folder = pathlib.Path(".")
groups = {}
for item in folder.rglob("*.txt"):
arr = str(item)[0:-4].split("/")
for i in range(len(arr)):
groups.setdefault(i,set())
groups[i].add(tuple(arr[:i+1]))
groupids = {}
for i in groups.keys():
for group in groups[i]:
name = group[-1]
filename="/".join(group) + ".txt"
parentid = groupids[tuple(group[:-1])] if len(group)>1 else None
newid = newgroup(name,parentid)
groupids[group] = newid
if pathlib.Path(filename).is_file():
addwords(filename,newid)
cur.close()
conn.close()