open Printf;;
open Str;;
open Nativeint;;
open Buffer;;

(*###############################################################*)
(*#                           Binarint                          #*)
(*###############################################################*)

(* On utilise du little endian *)
(* Le MSB est écrit en dernier (le plus à droite) *)
(* Puisque les listes en caml sont déjà l-endian *)
type binarint = nativeint;;
type bit = Zero | Un;;
let isize = Sys.int_size+1;;

(* lsl -> vers MSB *)
(* lsr -> vers LSB *)


let (<<<) = fun a b -> shift_right a b;;

let (>>>) = fun a b -> shift_left a b;;

let ithBit (n:binarint) i = if (rem (n <<< i) 2n=0n) then Zero else Un;;

let binarintToString (n:binarint) = let s = String.make isize '-' in
	for i=0 to isize-1 do
		Bytes.set s i (if (rem (n <<< i) 2n=0n) then '0' else '1')
	done;
	s;;


let parseBinarint (str:string) = let l = String.length str in
	if l > isize then raise (Invalid_argument("La chaine de caractères spécifiée est trop longue pour etre celle d'un binarint"))
	else let s = ref 0n and p = ref 1n in
		for i=0 to l-1 do
			s := add !s (if str.[i]='1' then !p else 0n);
			p := mul 2n (!p)
		done;
		!s;;

(*************** Filtres ***************)
let makeFilters () = let lsf = Array.make (isize+1) 0n and msf = Array.make (isize+1) 0n in
	for i=1 to isize do
		lsf.(i) <- add ((lsf.(i-1)) >>> 1) 1n
	done;
	for i=1 to isize do
		msf.(isize-i) <- lognot lsf.(i)
	done;
	(lsf,msf);;
let lsf,msf = makeFilters();;
let (<||) = fun n i -> (logand lsf.(i) n);;
let (>||) = fun n i -> (logand msf.(i) n);;


(*###############################################################*)
(*#                          Bitstring                          #*)
(*###############################################################*)

type bitstring = int * binarint list;;


let bitstringToString (bs:bitstring) = let i,l = bs in match l with
	| [] -> ""
	| last::l0 -> (List.fold_left (fun deja toPrint -> deja^(binarintToString toPrint)) "" l0)
						^(String.sub (binarintToString last) 0 i);;

let bitSize bs = match bs with
	| (i,[]) -> 0
	| (i,e::s) -> (List.length s)*isize + i;;


let rec ajoutPartial (lsbs:bitstring) (el:binarint) (eli:int) :bitstring=
	let (lsbsi,lsbsl)=lsbs in
	match lsbsl with
		| [] -> (eli,[el])
		| lsbse::lsbs0 -> 
			let returni = ((lsbsi + eli-1) mod isize) +1 in
			let returnl = 
				let intersec = logor (lsbse <|| lsbsi) ((el <|| (min eli (isize-lsbsi))) >>> lsbsi) in
				(if (lsbsi+eli) > isize
				then ((el<<<(isize-lsbsi) <|| (eli+lsbsi-isize))::intersec::lsbs0)
				else (intersec::lsbs0))
			in (returni,returnl);;

let rec ajoutFull (lsbs:bitstring) (el:binarint):bitstring =
	let (lsbsi,lsbsl)=lsbs in
	match lsbsl with
		| [] -> (isize,[el])
		| lsbsle::lsbsl0 -> 
			let reste = ((el <<< (isize-lsbsi)) <|| lsbsi) in
			let corps = (logor ((el >>> (isize-lsbsi)) >|| (isize-lsbsi)) (lsbsle <|| lsbsi)) in
				(lsbsi,reste::corps::lsbsl0);;
let concat (lsbs:bitstring) (msbs:bitstring) = (* Efficace si msbs est court *)
	let (msbsi,msbsl) = msbs in
	match msbsl with
		| [] -> lsbs (* On ne rajoute rien *)
		| msbsle::msbsl0 -> 
			let fullAdded = List.fold_left ajoutFull lsbs msbsl0 in
			ajoutPartial fullAdded msbsle msbsi;;


(*###############################################################*)
(*#                          Huffmann                           #*)
(*###############################################################*)

type motPondere = string * int;;

(* Un huffarbre n'est valide que si *)
(* 1 - Pour tout Noeud(p,a,b), p=poids(a)+poids(b) *)
(* 2 - Pour tout Noeud(p,a,b), poids(a)<=poids(b) *)
type huffarbre = Feuille of motPondere | Noeud of (int * huffarbre * huffarbre);;

(* On suppose que tous les huffarbres rencontrés seront valides *)
(* Il suffit ensuite de vérifier que les fonctions préservent la validité *)

let poids arb	= match arb with
	| Feuille((s,i)) -> i
	| Noeud(i,a,b) -> i;;

(* Trie le premier élément d'une liste avec f fonction de comparaison*)
let rec triPremierListe f lst =
		match lst with
			| [e] -> [e]
			| [] -> []
			| e1::e2::s ->if (f e1)>(f e2)
						then e2::(triPremierListe f (e1::s))
						else lst;;

(* Renvoie le triplet ordonné (pour l'ordre des poids des noeuds) de l'ensemble {a,b,c}*)
let sortThreeHuffarbres (a,b,c) = let i=poids a and j = poids b and k = poids c in 
		if i>=j && i>=k then (b,c,a) else
		if j>=i && j>=k then (a,c,b) else
		(*if k>=j and k>=i then*) (a,b,c);;


(* Fusionne deux noeuds *)
let rec mergeNodes n1 n2 = if (poids n1)>(poids n2) then mergeNodes n2 n1 else
let masseTotale = (poids n1) + (poids n2) in (* Conservation de la masse*)
	match (n1,n2) with
	| Feuille((s1,i1)),Feuille((s2,i2)) -> Noeud(masseTotale,n1,n2)
	| Noeud(p,a,b),Feuille(m) -> Noeud(masseTotale,mergeNodes a b,n2) (*les deux plus lourds sont nécessairement b et m*)
	| Feuille(m),Noeud(p,a,b) ->
		(* On a trois éléments à transformer en deux (a,b,n1).
		 On fusionne les deux de poids le plus faible puis on les range tel que l'ordre soit préservé*)
		let (ap,bp,cp) = (sortThreeHuffarbres (a,b,n1)) in (* On a cp qui est le plus lourd *)
		let nouveau = (mergeNodes ap bp) in
		if (poids nouveau)>=(poids cp)  then Noeud(masseTotale,cp,nouveau)
												else Noeud(masseTotale,nouveau,cp)
	| Noeud(p1,a1,b1),Noeud(p2,a2,b2) -> Noeud(masseTotale,n1,n2);;(* On a nécessairement p1>p2 *)


let orderedMerge n1 n2 = if (poids n1)>=(poids n2) then mergeNodes n2 n1 else mergeNodes n1 n2;;
let motMerger n m = orderedMerge n (Feuille(m));;

exception EmptyException;;
let intcompare i j = if i=j then 0 else if i<j then -1 else 1;;

let compareMots (l1:motPondere) (l2:motPondere) = let (s1,i1)=l1 and (s2,i2)=l2 in intcompare i1 i2;;

let construireArbre mots = let motsTrie = List.sort compareMots mots in
	match motsTrie with
		| [] -> raise EmptyException
		| e::s ->  List.fold_left motMerger (Feuille(e)) s;;


(*###############################################################*)
(*#                          Fichiers                           #*)
(*###############################################################*)

let read_file filename = 
	let lines = ref [] and chan = open_in filename in
			try
				while true; do
					lines := input_line chan :: !lines
				done; !lines
			with End_of_file ->
		close_in chan;
		List.rev !lines ;;
		
let filesize filename = let s = ref 0 and chan = open_in filename in
			try
				while true; do
					s := !s+(String.length (input_line chan))
				done; !s
			with End_of_file -> close_in chan;
		!s ;;

(*###############################################################*)
(*#                      WordMaker-input                       #*)
(*###############################################################*)
(*Fréquences de sinnogrammes: https://lingua.mtsu.edu/chinese-computing/statistics/char/list.php?Which=MO*)

let getMotsFromLine = Str.split (Str.regexp " ");;
(* Caractères qui, enlèvent l'espace à la fin d'un mot: ['- /\@&] exemple: chauve-souris*)
(* Caractères qui, enlèvent l'espace avant et en placent un après: [:.;,?!%] exemple: Quoi? Vraiment?*)

let ctrlchar = Char.chr 17 ;;
let ctrl str = (String.make 1 ctrlchar)^str;;

type schartype = SE|SB|SA|CW|SW|Maj|Min|Dig;;
let getCharType chr = let c=Char.code chr in
		match c with
		| c when List.mem c [39;45;47;92;64;38;95;124]            -> SE (* '-/\@&_| *)
		| c when List.mem c [40;91;123]                           -> SB (* ([{ *)
		| c when List.mem c [58;59;46;44;33;63;37;36;41;93;125]   -> SA (* :.;,?!%$)]} *)
		| c when List.mem c [34;35;42;43;60;61;62;94;96;126;9;10] -> CW (* "#*+<=>^`~ <Tab> <Line Feed> *)     (*"*)
		| 32                                                      -> SW (* <Space> *)
		| c when 65 <=c && c<=90  -> Maj
		| c when 97 <=c && c<=122 -> Min
		| c when 48 <=c && c<=57  -> Dig
		| _ -> CW;;
		
exception NotALetterException;;
let getMin chr = let ct = getCharType chr in
	String.make 1 (
	match ct with
		| Min|Dig -> chr
		| Maj -> Char.chr ((Char.code chr)+32)
		| _ -> raise NotALetterException
	);;

let getMaj chr = let ct = getCharType chr in
	String.make 1 (
	match ct with
		| Min -> Char.chr ((Char.code chr)-32)
		| _ -> chr
	);;
let getMajChar chr = let ct = getCharType chr in
	match ct with
		| Min -> Char.chr ((Char.code chr)-32)
		| _ -> chr;;
		

(***
(* Renvoie les caractères lc et rc de types respectifs lt et rt formatés pret au rajout *)
(* La nouvelle chaine concaténée est prete pour un nouveau mot.*)
let ajoutSchar ch lt lc rt rc =
	match (lt,rt) with
		| SE,_
;;
***)

(***
let getMotsFromLine str = let mots=ref []
	and lmot = ref ""
	and maj = ref false (* Tout est-il en majuscules *)
	and majed = ref false (* La premiere lettre est-elle en majuscule *) 
	and inMot = ref false
	and spaceBefore = ref false
	and spaceRequested = ref false
	and lc = nullchar
	and i = ref 0 in
while !i < (String.length str) do
	if !spaceRequested && getCharType.str[i] <> CW
	then begin
		(* Si on a demandé un espace et qu'il n'y en a pas *)
		(* Du coup on écris le caractère avec NOSPACE avant de continuer au traitement normal. *)
		(* Le cas ou la requete est satisfaite est dans le match ci-dessous *)
		(* Ben je ne suis pas un espace, alors bon ? on force avec NOSPACE*)
		inMot := false;
		mots := (String.make 1 !lc)::(ctrl "NOSPACE")::!mots;
		lmot := "";
		spaceBefore := false;
		spaceRequested := false (* Ben plus maintenant du coup *)
	end;
	
	match getCharType str.[i] in
		| SW -> (* Un espace en fait *)
			if !inMot
			then
				inMot := false;
				mots := !lmot::!mots;
				lmot := "";
				spaceBefore := true
			else 
				if !spaceRequested
				then
					mots := (String.make 1 !lc)::!mots;
					spaceRequested := false
					spaceBefore := true
					inMot := false
				else 
					###
		| SE ->
			if !inMot
			then
				lc := str.[i];
				inMot := false
			else
				###
		| SA ->
			if !inMot
			then
				lc := str.[i];
				inMot := false;
				spaceRequested := true
			else
				###
		| SB ->
			if !inMot
			then
				###
			else
				if !spaceBefore
				then
					mots := (String.make 1 str.[i])::!mots;
					spaceRequested := false;
					spaceBefore := false;
					inMot := false
				else
					###
		| CW ->
			if !inMot
			then
				(* Terminer le mot prématurement et rajouter le mot du caractère CW en espérant un espace *)
				inMot := false;
				mots := !lmot::(ctrl "NOSPACE")::(String.make 1 str.[i])::!mots;
				lmot := "";
				spaceBefore := false;
				spaceRequested := true;
			else
				if !spaceBefore
				then
					(* OK pour l'instant, on prépare le caractère en espérant un espace*)
					lc := str.[i];
					inMot := false;
					spaceRequested := true
				else
					(* D'après le if préléminaire, on a spaceRequested=false *)
					(* On a un CW avec une chaine qui devait etre finie *)
					###
		| Maj,Min,Dig -> 
			if !inMot 
			then
				spaceBefore := false;
				lmot := !lmot^str.[i]
			else
				match !lc with
					| SB | SE -> 
						mots := (String.make 1 !lc)::!mots;
						lc := nullchar
						spaceRequested := false
						spaceBefore := false
						inMot := true
					| _ ->
				###
				inMot := true
			
	;
	i := i+1
done;
;;**)

let finDeMot lst lmot majed fullmaj nospace =
	match (majed,fullmaj,nospace) with
		| (true,_,true) -> (ctrl "NOSPACE")::lmot::(ctrl "FULLMAJ")::lst
		| (true,_,false) -> lmot::(ctrl "MAJED")::lst
		| (false,true,true) -> (ctrl "NOSPACE")::lmot::(ctrl "FULLMAJ")::lst
		| (false,true,false) -> lmot::(ctrl "FULLMAJ")::lst
		| (false,false,true) -> (ctrl "NOSPACE")::lmot::lst
		| (false,false,false) -> lmot::lst ;;

let getMotsFromLine str = let mots=ref []
	and lmot = ref ""
	and fullmaj = ref false (* Tout est-il en majuscules *)
	and majed = ref false (* La premiere lettre est-elle en majuscule *)
	and dansmot = ref false (* As-t-on commencé un mot ? *) in
for i=0 to (String.length str-1) do
	match (getCharType str.[i]) with
		| Maj ->
			if !dansmot (* Si on ne commence pas un mot *)
			then
				if !fullmaj (* Si il n'y a que des majs jusque là *)
				then 
					begin
					majed := false; (* Il n'y a pas QUE la première lettre qui est en maj *)
					lmot := (!lmot)^(getMin str.[i]) (* On rajoute la lettre *)
					end
				else (* La précédente est une minuscule *)
					begin
					(* Donc on termine le mot et on met nospace *)
					mots := finDeMot !mots !lmot !majed !fullmaj true;
					(* Puis on commence un nouveau mot avec une majuscule *)
					lmot := getMin str.[i];
					fullmaj := true;
					majed := true;
					dansmot := true
					end
			else
				begin
				(* Ben on commence un mot avec une majuscule *)
				lmot := getMin str.[i];
				fullmaj := true;
				majed := true;
				dansmot := true
				end
		| Min ->
			if !dansmot (* Si on ne commence pas un mot *)
			then
				begin
				fullmaj := false; (* Il n'y a pas que des majuscules *)
				lmot := !lmot^(getMin str.[i]) (* On rajoute la lettre *)
				end
			else
				begin
				(* On commence un mot avec une minuscule *)
				lmot := getMin str.[i];
				majed := false;
				fullmaj := false;
				dansmot := true
				end
		(* | Dig -> On verra plus tard *)
		| SW ->
			if !dansmot
			then
				begin
				(* Ben on finit le mot avec un espace (normal quoi) *)
				mots := finDeMot !mots !lmot !majed !fullmaj false;
				dansmot := false;
				lmot := ""
				(* Et on passe, le caractère a été rajouté par l'absence de nospace *)
				end
			else
				begin
				(* On vire le précédent si c'était un NOSPACE, sinon, on rajoute un mot vide, sans NOSPACE*)
				match (!mots) with
					| e::l -> (* TODO #001 éviter de vider mots, préférer rajouter un booléen *)
						if e=(ctrl "NOSPACE") then mots := l else mots := ""::!mots
					| _ -> 
						mots := ""::!mots
				end
		| _ ->
			if !dansmot
			then
				begin
				(* On finit le mot, mais avec nospace, puisque le caractère est collé *)
				mots := finDeMot !mots !lmot !majed !fullmaj true;
				dansmot := false;
				lmot := "";
				(* Puis on rajoute le caractère, avec NOSPACE *)
				mots := (ctrl "NOSPACE")::(String.make 1 str.[i])::!mots (* TODO #001 *)
				end
			else
				begin
				(* Idem, mais on a pas le mot à finir *)
				mots := (ctrl "NOSPACE")::(String.make 1 str.[i])::!mots (* TODO #001 *)
				end
	done;
	mots := finDeMot !mots !lmot !majed !fullmaj false;
	
	!mots
;;

let getMotsFromFile filename = 
	let mots = ref [] and chan = open_in filename in
		try
			while true; do
				mots := (ctrl "NEWLINE")::((getMotsFromLine (input_line chan)) @ !mots)
			done; !mots
		with End_of_file ->
	close_in chan;
	!mots (* A l'envers mais on s'en fiche *);;

let getBytesFromFile filename = 
	let bits = ref (Bytes.create 0) and chan = open_in filename in
		try
			while true; do
				bits := Bytes.cat (Bytes.of_string (input_line chan))  !bits
			done; !bits
		with End_of_file ->
	close_in chan;
	!bits (* A l'envers mais on s'en fiche *);;


(*###############################################################*)
(*#                      WordMaker-output                       #*)
(*###############################################################*)


(*###############################################################*)
(*#                           Counters                          #*)
(*###############################################################*)

module CountMap = Map.Make(String);;
let motsToCountmap mots = 
	let rec ajout mots dict = match mots with
		| [] -> dict
		|  e::s -> let newDict = if CountMap.mem e dict
					then CountMap.add e ((CountMap.find e dict)+1) dict
					else CountMap.add e 1 dict in
				ajout s newDict
	in ajout mots CountMap.empty;;
	
let countmapToMotsPList cmap = CountMap.fold (fun k v l -> (k,v)::l) cmap [];;


(*###############################################################*)
(*#                          Encodeurs                          #*)
(*###############################################################*)

module Dict = Map.Make(String);;

let arbreToDict abre = 
	let rec ajout prefixe dictionnaire abre = match abre with
		| Feuille(s,i) -> Dict.add s prefixe dictionnaire
		| Noeud(i,a,b) -> let prefixGauche = concat prefixe (1,[1n]) and
									prefixDroit  = concat prefixe (1,[0n]) in
				let dic1 = ajout prefixGauche dictionnaire a in
				ajout prefixDroit  dic1 b
	in ajout (0,[0n]) Dict.empty abre;;

let dumpDict dict = Dict.iter (fun k v ->
	printf "\"%s\" -> %s\n" k (bitstringToString v)) dict;;

(* Renvoie une taille du dictionnaire en stockage *)
let dictSize dict = Dict.fold (fun str bs v -> v+(String.length str)*8+8+(bitSize bs)+8) dict 0;;


let encodeMots mots dict  = 
	let rec aux reste deja dict = match reste with
		| [] -> deja
		| e::r -> aux r (concat deja (Dict.find e dict)) dict in
	aux mots (0,[0n]) dict;;


(*###############################################################*)
(*#                          Décodeurs                          #*)
(*###############################################################*)

type bstate = {mutable majed:bool ; mutable fullmaj:bool ; mutable nospace:bool};;

let gstr state mot =
	let start = if state.nospace then "" else " " in
	if state.fullmaj
	then (start^(String.map getMajChar mot))
	else
		if state.majed
		then 
			match (String.length mot) with
				| 0 -> start
				| 1 -> (start^(getMaj mot.[0]))
				| n -> (start^(String.make 1 (getMajChar mot.[0]))^(String.sub mot 1 (n-1)))
		else start^mot;;
let resetstate buffer = 
	buffer.fullmaj <- false;
	buffer.majed <- false;
	buffer.nospace <- false;;

(* A changer pour les mots spéciaux*)
let addMotToBuffer mot buffer = add_string buffer mot;add_string buffer " ";;

let addMotToBuffer mot buffer state = 
	match mot with
		| l when l=(ctrl "NOSPACE") -> state.nospace <- true
		| l when l=(ctrl "MAJED") -> state.majed <- true
		| l when l=(ctrl "FULLMAJ") -> state.fullmaj <- true
		| l when l=(ctrl "NEWLINE") -> add_string buffer (gstr state "\n");resetstate state;state.nospace <- true
		|  _ -> add_string buffer (gstr state mot);resetstate state;;

let rec binarintReader arbre0 abre buffer state n i =
	match abre with
		| Feuille(m,p) -> 
			addMotToBuffer m buffer state;print_endline "f";
			binarintReader arbre0 arbre0 buffer state n i 
		| Noeud(p,a,b) -> print_endline (string_of_int i);
			if i>=isize 
			then abre(* On est au bout du binarint, on peut renvoyer l'état actuel*)
			else 
				match (ithBit n i) with
					| Zero -> binarintReader arbre0 b buffer state n (i+1)
					| Un   -> binarintReader arbre0 a buffer state n (i+1) ;;


let rec reader arbre0 toReadL buffer state=
	match toReadL with
		| [] -> arbre0 (* C'est vide, rien à ajouter*)
		| next::reste -> let abre = reader arbre0 reste buffer state in(* On lit d'abord les binarints suivants *)
			binarintReader arbre0 abre buffer state next 0;;

exception DecodeException;;

let decodeBitstring arbre bs = let buffer = Buffer.create 100 (*TODO mettre la capacité initiale en paramètre et essayer de la "déduire" de la taille du fichier*)
	and state = {majed=false;fullmaj=false;nospace=true} in
	match bs with
		| (i,[]) -> ""
		| (i,last::toRead) -> let abre = reader arbre toRead buffer state in
			let abreFin = binarintReader arbre abre buffer state (last >>> (isize-i)) (isize-i) in
			if abreFin != arbre then raise (DecodeException)
			else let out = Buffer.contents buffer in Buffer.reset buffer; out;;


(*###############################################################*)
(*#                      Ligne de commande                       #*)
(*###############################################################*)

let args = Sys.argv;;
match Array.length args with
	| 1 ->
		print_endline "Coucou ! "
	| 2 ->
		let livreFilename = args.(1) in
		let mots = List.rev (getMotsFromFile livreFilename) in
		let countmap = motsToCountmap mots in
		let motpList = countmapToMotsPList countmap in
		let harbre = construireArbre motpList in
		let dict = arbreToDict harbre in
		let encode = encodeMots mots dict in
		let s = bitSize encode and fs = filesize livreFilename and ds = dictSize dict in
			printf "%i bits, soit %i octets dont %i pour le dictionnaire sur un fichier de %i octets (%f %% de taux de compression)\n"
	 		(s+ds) ((s+ds)/8) (ds/8) fs (100. *. ((float_of_int (s+ds))/. 8. /. (float_of_int fs)))
	| 3 ->
		let livreFilename = args.(1) in
		(*let outName = args.(2) in*)
		let mots = List.rev (getMotsFromFile livreFilename) in
		let countmap = motsToCountmap mots in
		let motpList = countmapToMotsPList countmap in
		let harbre = construireArbre motpList in
		let dict = arbreToDict harbre in
		let encode = encodeMots mots dict in
		let s = bitSize encode and fs = filesize livreFilename and ds = dictSize dict in
			printf "%i bits, soit %i octets dont %i pour le dictionnaire sur un fichier de %i octets (%f %% de taux de compression)\n"
	 		(s+ds) ((s+ds)/8) (ds/8) fs (100. *. ((float_of_int (s+ds))/. 8. /. (float_of_int fs)));
		let decode = decodeBitstring harbre encode in
		print_endline (decode);
		print_endline "La, je suis censé écrire dans un fichier ?"
	| _ ->
		print_endline "Ça fait beaucoup d'arguments ?";;