744 lines
23 KiB
OCaml
744 lines
23 KiB
OCaml
open Printf;;
|
|
open Str;;
|
|
open Nativeint;;
|
|
open Buffer;;
|
|
#load "str.cma";;
|
|
|
|
(*###############################################################*)
|
|
(*# Binarint #*)
|
|
(*###############################################################*)
|
|
|
|
(* On utilise du little endian *)
|
|
(* Le MSB est écrit en dernier (le plus à droite) *)
|
|
(* Puisque les listes en caml sont déjà l-endian *)
|
|
type binarint = nativeint;;
|
|
type bit = Zero | Un;;
|
|
let isize = Sys.int_size+1;;
|
|
|
|
(* lsl -> vers MSB *)
|
|
(* lsr -> vers LSB *)
|
|
|
|
|
|
let (<<<) = fun a b -> shift_right a b;;
|
|
|
|
let (>>>) = fun a b -> shift_left a b;;
|
|
|
|
let ithBit (n:binarint) i = if (rem (n <<< i) 2n=0n) then Zero else Un;;
|
|
|
|
let binarintToString (n:binarint) = let s = String.make isize '-' in
|
|
for i=0 to isize-1 do
|
|
Bytes.set s i (if (rem (n <<< i) 2n=0n) then '0' else '1')
|
|
done;
|
|
s;;
|
|
|
|
|
|
let parseBinarint (str:string) = let l = String.length str in
|
|
if l > isize then raise (Invalid_argument("La chaine de caractères spécifiée est trop longue pour etre celle d'un binarint"))
|
|
else let s = ref 0n and p = ref 1n in
|
|
for i=0 to l-1 do
|
|
s := add !s (if str.[i]='1' then !p else 0n);
|
|
p := mul 2n (!p)
|
|
done;
|
|
!s;;
|
|
|
|
(*************** Filtres ***************)
|
|
let makeFilters () = let lsf = Array.make (isize+1) 0n and msf = Array.make (isize+1) 0n in
|
|
for i=1 to isize do
|
|
lsf.(i) <- add ((lsf.(i-1)) >>> 1) 1n
|
|
done;
|
|
for i=1 to isize do
|
|
msf.(isize-i) <- lognot lsf.(i)
|
|
done;
|
|
(lsf,msf);;
|
|
let lsf,msf = makeFilters();;
|
|
let (<||) = fun n i -> (logand lsf.(i) n);;
|
|
let (>||) = fun n i -> (logand msf.(i) n);;
|
|
|
|
|
|
|
|
(*###############################################################*)
|
|
(*# Bitstring #*)
|
|
(*###############################################################*)
|
|
|
|
type bitstring = int * binarint list;;
|
|
|
|
|
|
let bitstringToString (bs:bitstring) = let i,l = bs in match l with
|
|
| [] -> ""
|
|
| last::l0 -> (List.fold_left (fun deja toPrint -> deja^(binarintToString toPrint)) "" l0)
|
|
^(String.sub (binarintToString last) 0 i);;
|
|
|
|
let bitSize bs = match bs with
|
|
| (i,[]) -> 0
|
|
| (i,e::s) -> (List.length s)*isize + i;;
|
|
|
|
|
|
let rec ajoutPartial (lsbs:bitstring) (el:binarint) (eli:int) :bitstring=
|
|
let (lsbsi,lsbsl)=lsbs in
|
|
match lsbsl with
|
|
| [] -> (eli,[el])
|
|
| lsbse::lsbs0 ->
|
|
let returni = ((lsbsi + eli-1) mod isize) +1 in
|
|
let returnl =
|
|
let intersec = logor (lsbse <|| lsbsi) ((el <|| (min eli (isize-lsbsi))) >>> lsbsi) in
|
|
(if (lsbsi+eli) > isize
|
|
then ((el<<<(isize-lsbsi) <|| (eli+lsbsi-isize))::intersec::lsbs0)
|
|
else (intersec::lsbs0))
|
|
in (returni,returnl);;
|
|
|
|
let rec ajoutFull (lsbs:bitstring) (el:binarint):bitstring =
|
|
let (lsbsi,lsbsl)=lsbs in
|
|
match lsbsl with
|
|
| [] -> (isize,[el])
|
|
| lsbsle::lsbsl0 ->
|
|
let reste = ((el <<< (isize-lsbsi)) <|| lsbsi) in
|
|
let corps = (logor ((el >>> (isize-lsbsi)) >|| (isize-lsbsi)) (lsbsle <|| lsbsi)) in
|
|
(lsbsi,reste::corps::lsbsl0);;
|
|
let concat (lsbs:bitstring) (msbs:bitstring) = (* Efficace si msbs est court *)
|
|
let (msbsi,msbsl) = msbs in
|
|
match msbsl with
|
|
| [] -> lsbs (* On ne rajoute rien *)
|
|
| msbsle::msbsl0 ->
|
|
let fullAdded = List.fold_left ajoutFull lsbs msbsl0 in
|
|
ajoutPartial fullAdded msbsle msbsi;;
|
|
|
|
|
|
|
|
(*###############################################################*)
|
|
(*# Huffmann #*)
|
|
(*###############################################################*)
|
|
|
|
type motPondere = string * int;;
|
|
|
|
(* Un huffarbre n'est valide que si *)
|
|
(* 1 - Pour tout Noeud(p,a,b), p=poids(a)+poids(b) *)
|
|
(* 2 - Pour tout Noeud(p,a,b), poids(a)<=poids(b) *)
|
|
type huffarbre = Feuille of motPondere | Noeud of (int * huffarbre * huffarbre);;
|
|
|
|
(* On suppose que tous les huffarbres rencontrés seront valides *)
|
|
(* Il suffit ensuite de vérifier que les fonctions préservent la validité *)
|
|
|
|
let poids arb = match arb with
|
|
| Feuille((s,i)) -> i
|
|
| Noeud(i,a,b) -> i;;
|
|
|
|
(* Trie le premier élément d'une liste avec f fonction de comparaison*)
|
|
let rec triPremierListe f lst =
|
|
match lst with
|
|
| [e] -> [e]
|
|
| [] -> []
|
|
| e1::e2::s ->if (f e1)>(f e2)
|
|
then e2::(triPremierListe f (e1::s))
|
|
else lst;;
|
|
|
|
(* Renvoie le triplet ordonné (pour l'ordre des poids des noeuds) de l'ensemble {a,b,c}*)
|
|
let sortThreeHuffarbres (a,b,c) = let i=poids a and j = poids b and k = poids c in
|
|
if i>=j && i>=k then (b,c,a) else
|
|
if j>=i && j>=k then (a,c,b) else
|
|
(*if k>=j and k>=i then*) (a,b,c);;
|
|
|
|
|
|
|
|
|
|
(* Fusionne deux noeuds *)
|
|
let rec mergeNodes n1 n2 = if (poids n1)>(poids n2) then mergeNodes n2 n1 else
|
|
let masseTotale = (poids n1) + (poids n2) in (* Conservation de la masse*)
|
|
match (n1,n2) with
|
|
| Feuille((s1,i1)),Feuille((s2,i2)) -> Noeud(masseTotale,n1,n2)
|
|
| Noeud(p,a,b),Feuille(m) -> Noeud(masseTotale,mergeNodes a b,n2) (*les deux plus lourds sont nécessairement b et m*)
|
|
| Feuille(m),Noeud(p,a,b) ->
|
|
(* On a trois éléments à transformer en deux (a,b,n1).
|
|
On fusionne les deux de poids le plus faible puis on les range tel que l'ordre soit préservé*)
|
|
let (ap,bp,cp) = (sortThreeHuffarbres (a,b,n1)) in (* On a cp qui est le plus lourd *)
|
|
let nouveau = (mergeNodes ap bp) in
|
|
if (poids nouveau)>=(poids cp) then Noeud(masseTotale,cp,nouveau)
|
|
else Noeud(masseTotale,nouveau,cp)
|
|
| Noeud(p1,a1,b1),Noeud(p2,a2,b2) -> Noeud(masseTotale,n1,n2);;(* On a nécessairement p1>p2 *)
|
|
|
|
|
|
let orderedMerge n1 n2 = if (poids n1)>=(poids n2) then mergeNodes n2 n1 else mergeNodes n1 n2;;
|
|
let motMerger n m = orderedMerge n (Feuille(m));;
|
|
|
|
exception EmptyException;;
|
|
let intcompare i j = if i=j then 0 else if i<j then -1 else 1;;
|
|
|
|
let compareMots (l1:motPondere) (l2:motPondere) = let (s1,i1)=l1 and (s2,i2)=l2 in intcompare i1 i2;;
|
|
|
|
let construireArbre mots = let motsTrie = List.sort compareMots mots in
|
|
match motsTrie with
|
|
| [] -> raise EmptyException
|
|
| e::s -> List.fold_left motMerger (Feuille(e)) s;;
|
|
|
|
|
|
|
|
(*###############################################################*)
|
|
(*# Fichiers #*)
|
|
(*###############################################################*)
|
|
|
|
let read_file filename =
|
|
let lines = ref [] and chan = open_in filename in
|
|
try
|
|
while true; do
|
|
lines := input_line chan :: !lines
|
|
done; !lines
|
|
with End_of_file ->
|
|
close_in chan;
|
|
List.rev !lines ;;
|
|
|
|
let filesize filename = let s = ref 0 and chan = open_in filename in
|
|
try
|
|
while true; do
|
|
s := !s+(String.length (input_line chan))
|
|
done; !s
|
|
with End_of_file -> close_in chan;
|
|
!s ;;
|
|
|
|
(*###############################################################*)
|
|
(*# WordMaker-input #*)
|
|
(*###############################################################*)
|
|
(*Fréquences de sinnogrammes: https://lingua.mtsu.edu/chinese-computing/statistics/char/list.php?Which=MO*)
|
|
|
|
let getMotsFromLine = Str.split (Str.regexp " ");;
|
|
(* Caractères qui, enlèvent l'espace à la fin d'un mot: ['- /\@&] exemple: chauve-souris*)
|
|
(* Caractères qui, enlèvent l'espace avant et en placent un après: [:.;,?!%] exemple: Quoi? Vraiment?*)
|
|
|
|
let ctrlchar = Char.chr 17 ;;
|
|
let ctrl str = (String.make 1 ctrlchar)^str;;
|
|
|
|
type schartype = SE|SB|SA|CW|SW|Maj|Min|Dig;;
|
|
let getCharType chr = let c=Char.code chr in
|
|
match c with
|
|
| c when List.mem c [39;45;47;92;64;38;95;124] -> SE (* '-/\@&_| *)
|
|
| c when List.mem c [40;91;123] -> SB (* ([{ *)
|
|
| c when List.mem c [58;59;46;44;33;63;37;36;41;93;125] -> SA (* :.;,?!%$)]} *)
|
|
| c when List.mem c [34;35;42;43;60;61;62;94;96;126;9;10] -> CW (* "#*+<=>^`~ <Tab> <Line Feed> *) (*"*)
|
|
| 32 -> SW (* <Space> *)
|
|
| c when 65 <=c && c<=90 -> Maj
|
|
| c when 97 <=c && c<=122 -> Min
|
|
| c when 48 <=c && c<=57 -> Dig
|
|
| _ -> CW;;
|
|
|
|
exception NotALetterException;;
|
|
let getMin chr = let ct = getCharType chr in
|
|
String.make 1 (
|
|
match ct with
|
|
| Min|Dig -> chr
|
|
| Maj -> Char.chr ((Char.code chr)+32)
|
|
| _ -> raise NotALetterException
|
|
);;
|
|
|
|
let getMaj chr = let ct = getCharType chr in
|
|
String.make 1 (
|
|
match ct with
|
|
| Min -> Char.chr ((Char.code chr)-32)
|
|
| _ -> chr
|
|
);;
|
|
let getMajChar chr = let ct = getCharType chr in
|
|
match ct with
|
|
| Min -> Char.chr ((Char.code chr)-32)
|
|
| _ -> chr;;
|
|
|
|
|
|
(***
|
|
(* Renvoie les caractères lc et rc de types respectifs lt et rt formatés pret au rajout *)
|
|
(* La nouvelle chaine concaténée est prete pour un nouveau mot.*)
|
|
let ajoutSchar ch lt lc rt rc =
|
|
match (lt,rt) with
|
|
| SE,_
|
|
;;
|
|
***)
|
|
|
|
(***
|
|
let getMotsFromLine str = let mots=ref []
|
|
and lmot = ref ""
|
|
and maj = ref false (* Tout est-il en majuscules *)
|
|
and majed = ref false (* La premiere lettre est-elle en majuscule *)
|
|
and inMot = ref false
|
|
and spaceBefore = ref false
|
|
and spaceRequested = ref false
|
|
and lc = nullchar
|
|
and i = ref 0 in
|
|
while !i < (String.length str) do
|
|
if !spaceRequested && getCharType.str[i] <> CW
|
|
then begin
|
|
(* Si on a demandé un espace et qu'il n'y en a pas *)
|
|
(* Du coup on écris le caractère avec NOSPACE avant de continuer au traitement normal. *)
|
|
(* Le cas ou la requete est satisfaite est dans le match ci-dessous *)
|
|
(* Ben je ne suis pas un espace, alors bon ? on force avec NOSPACE*)
|
|
inMot := false;
|
|
mots := (String.make 1 !lc)::(ctrl "NOSPACE")::!mots;
|
|
lmot := "";
|
|
spaceBefore := false;
|
|
spaceRequested := false (* Ben plus maintenant du coup *)
|
|
end;
|
|
|
|
match getCharType str.[i] in
|
|
| SW -> (* Un espace en fait *)
|
|
if !inMot
|
|
then
|
|
inMot := false;
|
|
mots := !lmot::!mots;
|
|
lmot := "";
|
|
spaceBefore := true
|
|
else
|
|
if !spaceRequested
|
|
then
|
|
mots := (String.make 1 !lc)::!mots;
|
|
spaceRequested := false
|
|
spaceBefore := true
|
|
inMot := false
|
|
else
|
|
###
|
|
| SE ->
|
|
if !inMot
|
|
then
|
|
lc := str.[i];
|
|
inMot := false
|
|
else
|
|
###
|
|
| SA ->
|
|
if !inMot
|
|
then
|
|
lc := str.[i];
|
|
inMot := false;
|
|
spaceRequested := true
|
|
else
|
|
###
|
|
| SB ->
|
|
if !inMot
|
|
then
|
|
###
|
|
else
|
|
if !spaceBefore
|
|
then
|
|
mots := (String.make 1 str.[i])::!mots;
|
|
spaceRequested := false;
|
|
spaceBefore := false;
|
|
inMot := false
|
|
else
|
|
###
|
|
| CW ->
|
|
if !inMot
|
|
then
|
|
(* Terminer le mot prématurement et rajouter le mot du caractère CW en espérant un espace *)
|
|
inMot := false;
|
|
mots := !lmot::(ctrl "NOSPACE")::(String.make 1 str.[i])::!mots;
|
|
lmot := "";
|
|
spaceBefore := false;
|
|
spaceRequested := true;
|
|
else
|
|
if !spaceBefore
|
|
then
|
|
(* OK pour l'instant, on prépare le caractère en espérant un espace*)
|
|
lc := str.[i];
|
|
inMot := false;
|
|
spaceRequested := true
|
|
else
|
|
(* D'après le if préléminaire, on a spaceRequested=false *)
|
|
(* On a un CW avec une chaine qui devait etre finie *)
|
|
###
|
|
| Maj,Min,Dig ->
|
|
if !inMot
|
|
then
|
|
spaceBefore := false;
|
|
lmot := !lmot^str.[i]
|
|
else
|
|
match !lc with
|
|
| SB | SE ->
|
|
mots := (String.make 1 !lc)::!mots;
|
|
lc := nullchar
|
|
spaceRequested := false
|
|
spaceBefore := false
|
|
inMot := true
|
|
| _ ->
|
|
###
|
|
inMot := true
|
|
|
|
;
|
|
i := i+1
|
|
done;
|
|
;;**)
|
|
|
|
let finDeMot lst lmot majed fullmaj nospace =
|
|
match (majed,fullmaj,nospace) with
|
|
| (true,_,true) -> (ctrl "NOSPACE")::lmot::(ctrl "FULLMAJ")::lst
|
|
| (true,_,false) -> lmot::(ctrl "MAJED")::lst
|
|
| (false,true,true) -> (ctrl "NOSPACE")::lmot::(ctrl "FULLMAJ")::lst
|
|
| (false,true,false) -> lmot::(ctrl "FULLMAJ")::lst
|
|
| (false,false,true) -> (ctrl "NOSPACE")::lmot::lst
|
|
| (false,false,false) -> lmot::lst ;;
|
|
|
|
let getMotsFromLine str = let mots=ref []
|
|
and lmot = ref ""
|
|
and fullmaj = ref false (* Tout est-il en majuscules *)
|
|
and majed = ref false (* La premiere lettre est-elle en majuscule *)
|
|
and dansmot = ref false (* As-t-on commencé un mot ? *) in
|
|
for i=0 to (String.length str-1) do
|
|
match (getCharType str.[i]) with
|
|
| Maj ->
|
|
if !dansmot (* Si on ne commence pas un mot *)
|
|
then
|
|
if !fullmaj (* Si il n'y a que des majs jusque là *)
|
|
then
|
|
begin
|
|
majed := false; (* Il n'y a pas QUE la première lettre qui est en maj *)
|
|
lmot := (!lmot)^(getMin str.[i]) (* On rajoute la lettre *)
|
|
end
|
|
else (* La précédente est une minuscule *)
|
|
begin
|
|
(* Donc on termine le mot et on met nospace *)
|
|
mots := finDeMot !mots !lmot !majed !fullmaj true;
|
|
(* Puis on commence un nouveau mot avec une majuscule *)
|
|
lmot := getMin str.[i];
|
|
fullmaj := true;
|
|
majed := true;
|
|
dansmot := true
|
|
end
|
|
else
|
|
begin
|
|
(* Ben on commence un mot avec une majuscule *)
|
|
lmot := getMin str.[i];
|
|
fullmaj := true;
|
|
majed := true;
|
|
dansmot := true
|
|
end
|
|
| Min ->
|
|
if !dansmot (* Si on ne commence pas un mot *)
|
|
then
|
|
begin
|
|
fullmaj := false; (* Il n'y a pas que des majuscules *)
|
|
lmot := !lmot^(getMin str.[i]) (* On rajoute la lettre *)
|
|
end
|
|
else
|
|
begin
|
|
(* On commence un mot avec une minuscule *)
|
|
lmot := getMin str.[i];
|
|
majed := false;
|
|
fullmaj := false;
|
|
dansmot := true
|
|
end
|
|
(* | Dig -> On verra plus tard *)
|
|
| SW ->
|
|
if !dansmot
|
|
then
|
|
begin
|
|
(* Ben on finit le mot avec un espace (normal quoi) *)
|
|
mots := finDeMot !mots !lmot !majed !fullmaj false;
|
|
dansmot := false;
|
|
lmot := ""
|
|
(* Et on passe, le caractère a été rajouté par l'absence de nospace *)
|
|
end
|
|
else
|
|
begin
|
|
(* On vire le précédent si c'était un NOSPACE, sinon, on rajoute un mot vide, sans NOSPACE*)
|
|
match (!mots) with
|
|
| e::l -> (* TODO #001 éviter de vider mots, préférer rajouter un booléen *)
|
|
if e=(ctrl "NOSPACE") then mots := l else mots := ""::!mots
|
|
| _ ->
|
|
mots := ""::!mots
|
|
end
|
|
| _ ->
|
|
if !dansmot
|
|
then
|
|
begin
|
|
(* On finit le mot, mais avec nospace, puisque le caractère est collé *)
|
|
mots := finDeMot !mots !lmot !majed !fullmaj true;
|
|
dansmot := false;
|
|
lmot := "";
|
|
(* Puis on rajoute le caractère, avec NOSPACE *)
|
|
mots := (ctrl "NOSPACE")::(String.make 1 str.[i])::!mots (* TODO #001 *)
|
|
end
|
|
else
|
|
begin
|
|
(* Idem, mais on a pas le mot à finir *)
|
|
mots := (ctrl "NOSPACE")::(String.make 1 str.[i])::!mots (* TODO #001 *)
|
|
end
|
|
done;
|
|
mots := finDeMot !mots !lmot !majed !fullmaj false;
|
|
|
|
!mots
|
|
;;
|
|
|
|
let getMotsFromFile filename =
|
|
let mots = ref [] and chan = open_in filename in
|
|
try
|
|
while true; do
|
|
mots := (ctrl "NEWLINE")::((getMotsFromLine (input_line chan)) @ !mots)
|
|
done; !mots
|
|
with End_of_file ->
|
|
close_in chan;
|
|
!mots (* A l'envers mais on s'en fiche *);;
|
|
|
|
let getBytesFromFile filename =
|
|
let bits = ref (Bytes.create 0) and chan = open_in filename in
|
|
try
|
|
while true; do
|
|
bits := Bytes.cat (Bytes.of_string (input_line chan)) !bits
|
|
done; !bits
|
|
with End_of_file ->
|
|
close_in chan;
|
|
!bits (* A l'envers mais on s'en fiche *);;
|
|
|
|
|
|
|
|
(*###############################################################*)
|
|
(*# WordMaker-output #*)
|
|
(*###############################################################*)
|
|
|
|
|
|
|
|
|
|
(*###############################################################*)
|
|
(*# Counters #*)
|
|
(*###############################################################*)
|
|
|
|
module CountMap = Map.Make(String);;
|
|
let motsToCountmap mots =
|
|
let rec ajout mots dict = match mots with
|
|
| [] -> dict
|
|
| e::s -> let newDict = if CountMap.mem e dict
|
|
then CountMap.add e ((CountMap.find e dict)+1) dict
|
|
else CountMap.add e 1 dict in
|
|
ajout s newDict
|
|
in ajout mots CountMap.empty;;
|
|
|
|
let countmapToMotsPList cmap = CountMap.fold (fun k v l -> (k,v)::l) cmap [];;
|
|
|
|
|
|
(*###############################################################*)
|
|
(*# Encodeurs #*)
|
|
(*###############################################################*)
|
|
|
|
module Dict = Map.Make(String);;
|
|
|
|
let arbreToDict abre =
|
|
let rec ajout prefixe dictionnaire abre = match abre with
|
|
| Feuille(s,i) -> Dict.add s prefixe dictionnaire
|
|
| Noeud(i,a,b) -> let prefixGauche = concat prefixe (1,[1n]) and
|
|
prefixDroit = concat prefixe (1,[0n]) in
|
|
let dic1 = ajout prefixGauche dictionnaire a in
|
|
ajout prefixDroit dic1 b
|
|
in ajout (0,[0n]) Dict.empty abre;;
|
|
|
|
let dumpDict dict = Dict.iter (fun k v ->
|
|
printf "\"%s\" -> %s\n" k (bitstringToString v)) dict;;
|
|
|
|
(* Renvoie une taille du dictionnaire en stockage *)
|
|
let dictSize dict = Dict.fold (fun str bs v -> v+(String.length str)*8+8+(bitSize bs)+8) dict 0;;
|
|
|
|
|
|
let encodeMots mots dict =
|
|
let rec aux reste deja dict = match reste with
|
|
| [] -> deja
|
|
| e::r -> aux r (concat deja (Dict.find e dict)) dict in
|
|
aux mots (0,[0n]) dict;;
|
|
|
|
|
|
(*###############################################################*)
|
|
(*# Décodeurs #*)
|
|
(*###############################################################*)
|
|
|
|
type bstate = {mutable majed:bool ; mutable fullmaj:bool ; mutable nospace:bool};;
|
|
|
|
let gstr state mot =
|
|
let start = if state.nospace then "" else " " in
|
|
if state.fullmaj
|
|
then (start^(String.map getMajChar mot))
|
|
else
|
|
if state.majed
|
|
then
|
|
match (String.length mot) with
|
|
| 0 -> start
|
|
| 1 -> (start^(getMaj mot.[0]))
|
|
| n -> (start^(String.make 1 (getMajChar mot.[0]))^(String.sub mot 1 (n-1)))
|
|
else start^mot;;
|
|
let resetstate buffer =
|
|
buffer.fullmaj <- false;
|
|
buffer.majed <- false;
|
|
buffer.nospace <- false;;
|
|
|
|
(* A changer pour les mots spéciaux*)
|
|
let addMotToBuffer mot buffer = add_string buffer mot;add_string buffer " ";;
|
|
|
|
let addMotToBuffer mot buffer state =
|
|
match mot with
|
|
| l when l=(ctrl "NOSPACE") -> state.nospace <- true
|
|
| l when l=(ctrl "MAJED") -> state.majed <- true
|
|
| l when l=(ctrl "FULLMAJ") -> state.fullmaj <- true
|
|
| l when l=(ctrl "NEWLINE") -> add_string buffer (gstr state "\n");resetstate state;state.nospace <- true
|
|
| _ -> add_string buffer (gstr state mot);resetstate state;;
|
|
|
|
let rec binarintReader arbre0 abre buffer state n i =
|
|
match abre with
|
|
| Feuille(m,p) ->
|
|
addMotToBuffer m buffer state;print_endline "f";
|
|
binarintReader arbre0 arbre0 buffer state n i
|
|
| Noeud(p,a,b) -> print_endline (string_of_int i);
|
|
if i>=isize
|
|
then abre(* On est au bout du binarint, on peut renvoyer l'état actuel*)
|
|
else
|
|
match (ithBit n i) with
|
|
| Zero -> binarintReader arbre0 b buffer state n (i+1)
|
|
| Un -> binarintReader arbre0 a buffer state n (i+1) ;;
|
|
|
|
|
|
let rec reader arbre0 toReadL buffer state=
|
|
match toReadL with
|
|
| [] -> arbre0 (* C'est vide, rien à ajouter*)
|
|
| next::reste -> let abre = reader arbre0 reste buffer state in(* On lit d'abord les binarints suivants *)
|
|
binarintReader arbre0 abre buffer state next 0;;
|
|
|
|
exception DecodeException;;
|
|
|
|
let decodeBitstring arbre bs = let buffer = Buffer.create 100 (*TODO mettre la capacité initiale en paramètre et essayer de la "déduire" de la taille du fichier*)
|
|
and state = {majed=false;fullmaj=false;nospace=true} in
|
|
match bs with
|
|
| (i,[]) -> ""
|
|
| (i,last::toRead) -> let abre = reader arbre toRead buffer state in
|
|
let abreFin = binarintReader arbre abre buffer state (last >>> (isize-i)) (isize-i) in
|
|
if abreFin != arbre then raise (DecodeException)
|
|
else let out = Buffer.contents buffer in Buffer.reset buffer; out;;
|
|
|
|
|
|
(*###############################################################*)
|
|
(*# Ligne de commande #*)
|
|
(*###############################################################*)
|
|
|
|
let args = Sys.argv;;
|
|
match Array.length args with
|
|
| 1 ->
|
|
print_endline "Coucou ! "
|
|
| 2 ->
|
|
let livreFilename = args.(1) in
|
|
let mots = List.rev (getMotsFromFile livreFilename) in
|
|
let countmap = motsToCountmap mots in
|
|
let motpList = countmapToMotsPList countmap in
|
|
let harbre = construireArbre motpList in
|
|
let dict = arbreToDict harbre in
|
|
let encode = encodeMots mots dict in
|
|
let s = bitSize encode and fs = filesize livreFilename and ds = dictSize dict in
|
|
printf "%i bits, soit %i octets dont %i pour le dictionnaire sur un fichier de %i octets (%f %% de taux de compression)"
|
|
(s+ds) ((s+ds)/8) (ds/8) fs (100. *. ((float_of_int (s+ds))/. 8. /. (float_of_int fs)))
|
|
| 3 ->
|
|
let livreFilename = args.(1) in
|
|
(*let outName = args.(2) in*)
|
|
let mots = List.rev (getMotsFromFile livreFilename) in
|
|
let countmap = motsToCountmap mots in
|
|
let motpList = countmapToMotsPList countmap in
|
|
let harbre = construireArbre motpList in
|
|
let dict = arbreToDict harbre in
|
|
let encode = encodeMots mots dict in
|
|
let s = bitSize encode and fs = filesize livreFilename and ds = dictSize dict in
|
|
printf "%i bits, soit %i octets dont %i pour le dictionnaire sur un fichier de %i octets (%f %% de taux de compression)"
|
|
(s+ds) ((s+ds)/8) (ds/8) fs (100. *. ((float_of_int (s+ds))/. 8. /. (float_of_int fs)));
|
|
let decode = decodeBitstring harbre encode in
|
|
print_endline (decode);
|
|
print_endline "La, je suis censé écrire dans un fichier ?"
|
|
| _ ->
|
|
print_endline "Ça fait beaucoup d'arguments ?";;
|
|
|
|
(*###############################################################*)
|
|
(*# Procédure #*)
|
|
(*###############################################################*)
|
|
|
|
let livreFilename = "/home/mysaa/Documents/Spigot/merged.java";;
|
|
let mots = List.rev (getMotsFromFile livreFilename);;
|
|
let countmap = motsToCountmap mots;;
|
|
CountMap.find "un" countmap;;
|
|
let motpList = countmapToMotsPList countmap;;
|
|
List.rev (List.sort compareMots motpList);;
|
|
let harbre = construireArbre motpList;;
|
|
let dict = arbreToDict harbre;;
|
|
dumpDict dict;;
|
|
let encode = encodeMots mots dict;;
|
|
print_string (bitstringToString encode);;
|
|
print_endline (string_of_int (bitSize encode));;
|
|
let s = bitSize encode and fs = filesize livreFilename and ds = dictSize dict in
|
|
printf "%i bits, soit %i octets dont %i pour le dictionnaire sur un fichier de %i octets (%f %% de taux de compression)"
|
|
(s+ds) ((s+ds)/8) (ds/8) fs (100. *. ((float_of_int (s+ds))/. 8. /. (float_of_int fs)));;
|
|
|
|
let decode = decodeBitstring harbre encode;;
|
|
print_endline (decode);;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(*###############################################################*)
|
|
(*# Tests #*)
|
|
(*###############################################################*)
|
|
|
|
|
|
|
|
let x = ref (5,[13L]);;
|
|
let x0 = !x;;
|
|
let zer = (0,[]);;
|
|
#untrace ajout;;
|
|
for i=1 to 200 do
|
|
x := concat x0 !x;
|
|
print_endline (string_of_int i);
|
|
let i,l = !x in List.iter (function i6 -> print_endline (printInt64 i6)) l
|
|
done;;
|
|
printInt64 leftFilters.(32);;
|
|
printInt64 13L;;
|
|
|
|
printInt64 3722304989L;;
|
|
|
|
parseBinarint "010101";;
|
|
|
|
bitstringToString (12,[42;42]);;
|
|
|
|
for i=0 to 65 do
|
|
print_endline (binarintToString msf.(i))
|
|
done;;
|
|
|
|
(* Test de rapidité *)
|
|
let t0=Sys.time () in
|
|
let a = of_int 6745678876567878 and b = 12 in
|
|
for i=0 to 500_000 do
|
|
shift_right a b
|
|
done;
|
|
print_endline (string_of_float((Sys.time ())-. t0));
|
|
let t0=Sys.time () in
|
|
let a = 79066342487687576 and b = 12 in
|
|
for i=0 to 500_000 do
|
|
a lsr b
|
|
done;
|
|
print_endline (string_of_float ((Sys.time ())-. t0));;
|
|
(* Résultats: int reste plus rapide *)
|
|
|
|
|
|
let x = ref (6,[42n]);;
|
|
let x0 = !x;;
|
|
print_endline (bitstringToString x0);;
|
|
for i=0 to 60 do
|
|
x := concat !x x0;
|
|
print_endline (bitstringToString !x)
|
|
done;;
|
|
|
|
let testListe = [("a",35);("b",10);("c",19);("d",25);("e",06);("f",05)];;
|
|
let out = List.fold_left motMerger (Feuille(("de",3))) testListe;;
|
|
construireArbre testListe;;
|