TIPE2021/TIPE2021.ml
2020-11-12 17:54:22 +01:00

411 lines
13 KiB
OCaml

open Printf;;
open Str;;
open Nativeint;;
open Buffer;;
#load "str.cma";;
(*###############################################################*)
(*# Binarint #*)
(*###############################################################*)
(* On utilise du little endian *)
(* Le MSB est écrit en dernier (le plus à droite) *)
(* Puisque les listes en caml sont déjà l-endian *)
type binarint = nativeint;;
type bit = Zero | Un;;
let isize = Sys.int_size+1;;
(* lsl -> vers MSB *)
(* lsr -> vers LSB *)
let (<<<) = fun a b -> shift_right a b;;
let (>>>) = fun a b -> shift_left a b;;
let ithBit (n:binarint) i = if (rem (n <<< i) 2n=0n) then Zero else Un;;
let binarintToString (n:binarint) = let s = String.make isize '-' in
for i=0 to isize-1 do
Bytes.set s i (if (rem (n <<< i) 2n=0n) then '0' else '1')
done;
s;;
let parseBinarint (str:string) = let l = String.length str in
if l > isize then raise (Invalid_argument("La chaine de caractères spécifiée est trop longue pour etre celle d'un binarint"))
else let s = ref 0n and p = ref 1n in
for i=0 to l-1 do
s := add !s (if str.[i]='1' then !p else 0n);
p := mul 2n (!p)
done;
!s;;
(*************** Filtres ***************)
let makeFilters () = let lsf = Array.make (isize+1) 0n and msf = Array.make (isize+1) 0n in
for i=1 to isize do
lsf.(i) <- add ((lsf.(i-1)) >>> 1) 1n
done;
for i=1 to isize do
msf.(isize-i) <- lognot lsf.(i)
done;
(lsf,msf);;
let lsf,msf = makeFilters();;
let (<||) = fun n i -> (logand lsf.(i) n);;
let (>||) = fun n i -> (logand msf.(i) n);;
(*###############################################################*)
(*# Bitstring #*)
(*###############################################################*)
type bitstring = int * binarint list;;
let bitstringToString (bs:bitstring) = let i,l = bs in match l with
| [] -> ""
| last::l0 -> (List.fold_left (fun deja toPrint -> deja^(binarintToString toPrint)) "" l0)
^(String.sub (binarintToString last) 0 i);;
let bitSize bs = match bs with
| (i,[]) -> 0
| (i,e::s) -> (List.length s)*isize + i;;
let rec ajoutPartial (lsbs:bitstring) (el:binarint) (eli:int) :bitstring=
let (lsbsi,lsbsl)=lsbs in
match lsbsl with
| [] -> (eli,[el])
| lsbse::lsbs0 ->
let returni = ((lsbsi + eli-1) mod isize) +1 in
let returnl =
let intersec = logor (lsbse <|| lsbsi) ((el <|| (min eli (isize-lsbsi))) >>> lsbsi) in
(if (lsbsi+eli) > isize
then ((el<<<(isize-lsbsi) <|| (eli+lsbsi-isize))::intersec::lsbs0)
else (intersec::lsbs0))
in (returni,returnl);;
let rec ajoutFull (lsbs:bitstring) (el:binarint):bitstring =
let (lsbsi,lsbsl)=lsbs in
match lsbsl with
| [] -> (isize,[el])
| lsbsle::lsbsl0 ->
let reste = ((el <<< (isize-lsbsi)) <|| lsbsi) in
let corps = (logor ((el >>> (isize-lsbsi)) >|| (isize-lsbsi)) (lsbsle <|| lsbsi)) in
(lsbsi,reste::corps::lsbsl0);;
let concat (lsbs:bitstring) (msbs:bitstring) = (* Efficace si msbs est court *)
let (msbsi,msbsl) = msbs in
match msbsl with
| [] -> lsbs (* On ne rajoute rien *)
| msbsle::msbsl0 ->
let fullAdded = List.fold_left ajoutFull lsbs msbsl0 in
ajoutPartial fullAdded msbsle msbsi;;
(*###############################################################*)
(*# Huffmann #*)
(*###############################################################*)
type motPondere = string * int;;
(* Un huffarbre n'est valide que si *)
(* 1 - Pour tout Noeud(p,a,b), p=poids(a)+poids(b) *)
(* 2 - Pour tout Noeud(p,a,b), poids(a)<=poids(b) *)
type huffarbre = Feuille of motPondere | Noeud of (int * huffarbre * huffarbre);;
(* On suppose que tous les huffarbres rencontrés seront valides *)
(* Il suffit ensuite de vérifier que les fonctions préservent la validité *)
let poids arb = match arb with
| Feuille((s,i)) -> i
| Noeud(i,a,b) -> i;;
(* Trie le premier élément d'une liste avec f fonction de comparaison*)
let rec triPremierListe f lst =
match lst with
| [e] -> [e]
| [] -> []
| e1::e2::s ->if (f e1)>(f e2)
then e2::(triPremierListe f (e1::s))
else lst;;
(* Renvoie le triplet ordonné (pour l'ordre des poids des noeuds) de l'ensemble {a,b,c}*)
let sortThreeHuffarbres (a,b,c) = let i=poids a and j = poids b and k = poids c in
if i>=j && i>=k then (b,c,a) else
if j>=i && j>=k then (a,c,b) else
(*if k>=j and k>=i then*) (a,b,c);;
(* Fusionne deux noeuds *)
let rec mergeNodes n1 n2 = if (poids n1)>(poids n2) then mergeNodes n2 n1 else
let masseTotale = (poids n1) + (poids n2) in (* Conservation de la masse*)
match (n1,n2) with
| Feuille((s1,i1)),Feuille((s2,i2)) -> Noeud(masseTotale,n1,n2)
| Noeud(p,a,b),Feuille(m) -> Noeud(masseTotale,mergeNodes a b,n2) (*les deux plus lourds sont nécessairement b et m*)
| Feuille(m),Noeud(p,a,b) ->
(* On a trois éléments à transformer en deux (a,b,n1).
On fusionne les deux de poids le plus faible puis on les range tel que l'ordre soit préservé*)
let (ap,bp,cp) = (sortThreeHuffarbres (a,b,n1)) in (* On a cp qui est le plus lourd *)
let nouveau = (mergeNodes ap bp) in
if (poids nouveau)>=(poids cp) then Noeud(masseTotale,cp,nouveau)
else Noeud(masseTotale,nouveau,cp)
| Noeud(p1,a1,b1),Noeud(p2,a2,b2) -> Noeud(masseTotale,n1,n2);;(* On a nécessairement p1>p2 *)
let orderedMerge n1 n2 = if (poids n1)>=(poids n2) then mergeNodes n2 n1 else mergeNodes n1 n2;;
let motMerger n m = orderedMerge n (Feuille(m));;
exception EmptyException;;
let intcompare i j = if i=j then 0 else if i<j then -1 else 1;;
let compareMots (l1:motPondere) (l2:motPondere) = let (s1,i1)=l1 and (s2,i2)=l2 in intcompare i1 i2;;
let construireArbre mots = let motsTrie = List.sort compareMots mots in
match motsTrie with
| [] -> raise EmptyException
| e::s -> List.fold_left motMerger (Feuille(e)) s;;
(*###############################################################*)
(*# Fichiers #*)
(*###############################################################*)
let read_file filename =
let lines = ref [] and chan = open_in filename in
try
while true; do
lines := input_line chan :: !lines
done; !lines
with End_of_file ->
close_in chan;
List.rev !lines ;;
let filesize filename = let s = ref 0 and chan = open_in filename in
try
while true; do
s := !s+(String.length (input_line chan))
done; !s
with End_of_file -> close_in chan;
!s ;;
(*###############################################################*)
(*# WordMaker-input #*)
(*###############################################################*)
let getMotsFromLine = Str.split (Str.regexp " ");;
let getMotsFromFile filename =
let mots = ref [] and chan = open_in filename in
try
while true; do
mots := (getMotsFromLine (input_line chan)) @ !mots
done; !mots
with End_of_file ->
close_in chan;
!mots (* A l'envers mais on s'en fiche *);;
(*###############################################################*)
(*# WordMaker-output #*)
(*###############################################################*)
(*###############################################################*)
(*# Counters #*)
(*###############################################################*)
module CountMap = Map.Make(String);;
let motsToCountmap mots =
let rec ajout mots dict = match mots with
| [] -> dict
| e::s -> let newDict = if CountMap.mem e dict
then CountMap.add e ((CountMap.find e dict)+1) dict
else CountMap.add e 1 dict in
ajout s newDict
in ajout mots CountMap.empty;;
let countmapToMotsPList cmap = CountMap.fold (fun k v l -> (k,v)::l) cmap [];;
(*###############################################################*)
(*# Encodeurs #*)
(*###############################################################*)
module Dict = Map.Make(String);;
let arbreToDict abre =
let rec ajout prefixe dictionnaire abre = match abre with
| Feuille(s,i) -> Dict.add s prefixe dictionnaire
| Noeud(i,a,b) -> let prefixGauche = concat prefixe (1,[1n]) and
prefixDroit = concat prefixe (1,[0n]) in
let dic1 = ajout prefixGauche dictionnaire a in
ajout prefixDroit dic1 b
in ajout (0,[0n]) Dict.empty abre;;
let dumpDict dict = Dict.iter (fun k v ->
printf "\"%s\" -> %s\n" k (bitstringToString v)) dict;;
let dictSize dict = Dict.fold (fun str bs v -> v+(String.length str)*8+8+(bitSize bs)+8) dict 0;;
let encodeMots mots dict =
let rec aux reste deja dict = match reste with
| [] -> deja
| e::r -> aux r (concat deja (Dict.find e dict)) dict in
aux mots (0,[0n]) dict;;
(*###############################################################*)
(*# Décodeurs #*)
(*###############################################################*)
(* A changer pour les mots spéciaux*)
let addMotToBuffer mot buffer = add_string buffer mot;add_string buffer " ";;
let rec binarintReader arbre0 abre buffer n i =
match abre with
| Feuille(m,p) ->
addMotToBuffer m buffer;print_endline "f";
binarintReader arbre0 arbre0 buffer n i
| Noeud(p,a,b) -> print_endline (string_of_int i);
if i>=isize
then abre(* On est au bout du binarint, on peut renvoyer l'état actuel*)
else
match (ithBit n i) with
| Zero -> binarintReader arbre0 b buffer n (i+1)
| Un -> binarintReader arbre0 a buffer n (i+1) ;;
let rec reader arbre0 toReadL buffer=
print_endline (string_of_int (Buffer.length buffer));
match toReadL with
| [] -> arbre0 (* C'est vide, rien à ajouter*)
| next::reste -> let abre = reader arbre0 reste buffer in(* On lit d'abord les binarints suivants *)
binarintReader arbre0 abre buffer next 0;;
exception DecodeException;;
let decodeBitstring arbre bs = let buffer = Buffer.create 100 in(*TODO mettre la capacité initiale en paramètre et essayer de la "déduire" de la taille du fichier*)
match bs with
| (i,[]) -> ""
| (i,last::toRead) -> let abre = reader arbre toRead buffer in
let abreFin = binarintReader arbre abre buffer (last >>> (isize-i)) (isize-i) in
if abreFin != arbre then raise (DecodeException)
else let out = Buffer.contents buffer in Buffer.reset buffer; out;;
;;
(*###############################################################*)
(*# Procédure #*)
(*###############################################################*)
let livreFilename = "/home/mysaa/Documents/HistoireDesNombresEtDeLaNumerationNumerique.txt";;
let mots = getMotsFromFile livreFilename;;
let countmap = motsToCountmap mots;;
CountMap.find "un" countmap;;
let motpList = countmapToMotsPList countmap;;
List.rev (List.sort compareMots motpList);;
let harbre = construireArbre motpList;;
let dict = arbreToDict harbre;;
dumpDict dict;;
let encode = encodeMots mots dict;;
print_string (bitstringToString encode);;
print_endline (string_of_int (bitSize encode));;
let s = bitSize encode and fs = filesize livreFilename and ds = dictSize dict in
printf "%i bits, soit %i octets dont %i pour le dictionnaire sur un fichier de %i octets (%f %% de taux de compression)"
(s+ds) ((s+ds)/8) (ds/8) fs (100. *. ((float_of_int (s+ds))/. 8. /. (float_of_int fs)));;
let decode = decodeBitstring harbre encode;;
print_endline (decode);;
(*###############################################################*)
(*# Tests #*)
(*###############################################################*)
let x = ref (5,[13L]);;
let x0 = !x;;
let zer = (0,[]);;
#untrace ajout;;
for i=1 to 200 do
x := concat x0 !x;
print_endline (string_of_int i);
let i,l = !x in List.iter (function i6 -> print_endline (printInt64 i6)) l
done;;
printInt64 leftFilters.(32);;
printInt64 13L;;
printInt64 3722304989L;;
parseBinarint "010101";;
bitstringToString (12,[42;42]);;
for i=0 to 65 do
print_endline (binarintToString msf.(i))
done;;
(* Test de rapidité *)
let t0=Sys.time () in
let a = of_int 6745678876567878 and b = 12 in
for i=0 to 500_000_000 do
shift_right a b
done;
print_endline (string_of_float((Sys.time ())-. t0));
let t0=Sys.time () in
let a = 79066342487687576 and b = 12 in
for i=0 to 500_000_000 do
a lsr b
done;
print_endline (string_of_float ((Sys.time ())-. t0));;
(* Résultats: int reste plus rapide *)
let x = ref (6,[42n]);;
let x0 = !x;;
print_endline (bitstringToString x0);;
for i=0 to 60 do
x := concat !x x0;
print_endline (bitstringToString !x)
done;;
let testListe = [("a",35);("b",10);("c",19);("d",25);("e",06);("f",05)];;
let out = List.fold_left motMerger (Feuille(("de",3))) testListe;;
construireArbre testListe;;