diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..4699257 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,8 @@ +FROM python:3.12 + +RUN pip install psycopg2 + +COPY . /files +WORKDIR /files + +ENTRYPOINT python3 todb.py diff --git a/todb.py b/todb.py new file mode 100644 index 0000000..c3081f5 --- /dev/null +++ b/todb.py @@ -0,0 +1,64 @@ + +import pathlib +import psycopg2 +import os + +dbhost = os.environ.get('DB_HOST','scribblers') +dbuser = os.environ.get('DB_USER','scribblers') +dbpassword = os.environ['DB_PASSWORD'] +dbname = os.environ.get('DB_NAME','scribblers') + +print("Attempting to connect") +conn = psycopg2.connect( + host = dbhost, + database = dbname, + user = dbuser, + password = dbpassword, + connect_timeout = 3) + +cur = conn.cursor() + +def newgroup(name,parentid): + print("Creating group",name) + if parentid is None: + cur.execute("INSERT INTO wgroups (name,parentid) VALUES (%s,NULL) RETURNING id",(name,)) + else: + cur.execute("INSERT INTO wgroups (name,parentid) VALUES (%s,%s) RETURNING id",(name,parentid)) + return cur.fetchone()[0] + +def addwords(filename,groupid): + print("Adding words of files",filename,"to group id",groupid) + with open(filename) as file: + for line in file: + cur.execute("INSERT INTO words (str,groupid) VALUES (%s,%s)",(line.strip(),groupid)) + conn.commit() + +print("Cleaning the database") +cur.execute("DELETE FROM words") +cur.execute("DELETE FROM wgroups") +conn.commit() + +folder = pathlib.Path(".") +groups = {} +for item in folder.glob("*.txt"): + arr = str(item)[0:-4].split("_") + for i in range(len(arr)): + groups.setdefault(i,set()) + groups[i].add(tuple(arr[:i+1])) + +conn.commit() + +groupids = {} +for i in groups.keys(): + for group in groups[i]: + name = group[-1] + filename="_".join(group) + ".txt" + parentid = groupids[tuple(group[:-1])] if len(group)>1 else None + newid = newgroup(name,parentid) + groupids[group] = newid + if pathlib.Path(filename).is_file(): + addwords(filename,newid) + + +cur.close() +conn.close()