64 lines
1.6 KiB
Python
64 lines
1.6 KiB
Python
|
|
import pathlib
|
|
import psycopg2
|
|
import os
|
|
|
|
dbhost = os.environ.get('DB_HOST')
|
|
dbuser = os.environ.get('DB_USER','scribblers')
|
|
dbpassword = os.environ.get('DB_PASSWORD')
|
|
dbname = os.environ.get('DB_NAME','scribblers')
|
|
|
|
print("Attempting to connect")
|
|
conn = psycopg2.connect(
|
|
host = dbhost,
|
|
database = dbname,
|
|
user = dbuser,
|
|
password = dbpassword,
|
|
connect_timeout = 3)
|
|
|
|
cur = conn.cursor()
|
|
|
|
def newgroup(name,parentid):
|
|
print("Creating group",name)
|
|
if parentid is None:
|
|
cur.execute("INSERT INTO wgroups (name,parentid) VALUES (%s,NULL) RETURNING id",(name,))
|
|
else:
|
|
cur.execute("INSERT INTO wgroups (name,parentid) VALUES (%s,%s) RETURNING id",(name,parentid))
|
|
return cur.fetchone()[0]
|
|
|
|
def addwords(filename,groupid):
|
|
print("Adding words of files",filename,"to group id",groupid)
|
|
with open(filename) as file:
|
|
for line in file:
|
|
cur.execute("INSERT INTO words (str,groupid) VALUES (%s,%s)",(line.strip(),groupid))
|
|
conn.commit()
|
|
|
|
print("Cleaning the database")
|
|
cur.execute("DELETE FROM words")
|
|
cur.execute("UPDATE wgroups SET parentid=NULL")
|
|
cur.execute("DELETE FROM wgroups")
|
|
conn.commit()
|
|
|
|
folder = pathlib.Path(".")
|
|
groups = {}
|
|
for item in folder.rglob("*.txt"):
|
|
arr = str(item)[0:-4].split("/")
|
|
for i in range(len(arr)):
|
|
groups.setdefault(i,set())
|
|
groups[i].add(tuple(arr[:i+1]))
|
|
|
|
groupids = {}
|
|
for i in groups.keys():
|
|
for group in groups[i]:
|
|
name = group[-1]
|
|
filename="/".join(group) + ".txt"
|
|
parentid = groupids[tuple(group[:-1])] if len(group)>1 else None
|
|
newid = newgroup(name,parentid)
|
|
groupids[group] = newid
|
|
if pathlib.Path(filename).is_file():
|
|
addwords(filename,newid)
|
|
|
|
|
|
cur.close()
|
|
conn.close()
|