From d028d4ca46e0e722e6d9fda2dceb01f2df22c957 Mon Sep 17 00:00:00 2001 From: MysaaJava Date: Thu, 12 Nov 2020 17:54:22 +0100 Subject: [PATCH] Le premier fichier --- TIPE2021.ml | 411 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 411 insertions(+) create mode 100644 TIPE2021.ml diff --git a/TIPE2021.ml b/TIPE2021.ml new file mode 100644 index 0000000..fecbce3 --- /dev/null +++ b/TIPE2021.ml @@ -0,0 +1,411 @@ +open Printf;; +open Str;; +open Nativeint;; +open Buffer;; +#load "str.cma";; + +(*###############################################################*) +(*# Binarint #*) +(*###############################################################*) + +(* On utilise du little endian *) +(* Le MSB est écrit en dernier (le plus à droite) *) +(* Puisque les listes en caml sont déjà l-endian *) +type binarint = nativeint;; +type bit = Zero | Un;; +let isize = Sys.int_size+1;; + +(* lsl -> vers MSB *) +(* lsr -> vers LSB *) + + +let (<<<) = fun a b -> shift_right a b;; + +let (>>>) = fun a b -> shift_left a b;; + +let ithBit (n:binarint) i = if (rem (n <<< i) 2n=0n) then Zero else Un;; + +let binarintToString (n:binarint) = let s = String.make isize '-' in + for i=0 to isize-1 do + Bytes.set s i (if (rem (n <<< i) 2n=0n) then '0' else '1') + done; + s;; + + +let parseBinarint (str:string) = let l = String.length str in + if l > isize then raise (Invalid_argument("La chaine de caractères spécifiée est trop longue pour etre celle d'un binarint")) + else let s = ref 0n and p = ref 1n in + for i=0 to l-1 do + s := add !s (if str.[i]='1' then !p else 0n); + p := mul 2n (!p) + done; + !s;; + +(*************** Filtres ***************) +let makeFilters () = let lsf = Array.make (isize+1) 0n and msf = Array.make (isize+1) 0n in + for i=1 to isize do + lsf.(i) <- add ((lsf.(i-1)) >>> 1) 1n + done; + for i=1 to isize do + msf.(isize-i) <- lognot lsf.(i) + done; + (lsf,msf);; +let lsf,msf = makeFilters();; +let (<||) = fun n i -> (logand lsf.(i) n);; +let (>||) = fun n i -> (logand msf.(i) n);; + + + +(*###############################################################*) +(*# Bitstring #*) +(*###############################################################*) + +type bitstring = int * binarint list;; + + +let bitstringToString (bs:bitstring) = let i,l = bs in match l with + | [] -> "" + | last::l0 -> (List.fold_left (fun deja toPrint -> deja^(binarintToString toPrint)) "" l0) + ^(String.sub (binarintToString last) 0 i);; + +let bitSize bs = match bs with + | (i,[]) -> 0 + | (i,e::s) -> (List.length s)*isize + i;; + + +let rec ajoutPartial (lsbs:bitstring) (el:binarint) (eli:int) :bitstring= + let (lsbsi,lsbsl)=lsbs in + match lsbsl with + | [] -> (eli,[el]) + | lsbse::lsbs0 -> + let returni = ((lsbsi + eli-1) mod isize) +1 in + let returnl = + let intersec = logor (lsbse <|| lsbsi) ((el <|| (min eli (isize-lsbsi))) >>> lsbsi) in + (if (lsbsi+eli) > isize + then ((el<<<(isize-lsbsi) <|| (eli+lsbsi-isize))::intersec::lsbs0) + else (intersec::lsbs0)) + in (returni,returnl);; + +let rec ajoutFull (lsbs:bitstring) (el:binarint):bitstring = + let (lsbsi,lsbsl)=lsbs in + match lsbsl with + | [] -> (isize,[el]) + | lsbsle::lsbsl0 -> + let reste = ((el <<< (isize-lsbsi)) <|| lsbsi) in + let corps = (logor ((el >>> (isize-lsbsi)) >|| (isize-lsbsi)) (lsbsle <|| lsbsi)) in + (lsbsi,reste::corps::lsbsl0);; +let concat (lsbs:bitstring) (msbs:bitstring) = (* Efficace si msbs est court *) + let (msbsi,msbsl) = msbs in + match msbsl with + | [] -> lsbs (* On ne rajoute rien *) + | msbsle::msbsl0 -> + let fullAdded = List.fold_left ajoutFull lsbs msbsl0 in + ajoutPartial fullAdded msbsle msbsi;; + + + +(*###############################################################*) +(*# Huffmann #*) +(*###############################################################*) + +type motPondere = string * int;; + +(* Un huffarbre n'est valide que si *) +(* 1 - Pour tout Noeud(p,a,b), p=poids(a)+poids(b) *) +(* 2 - Pour tout Noeud(p,a,b), poids(a)<=poids(b) *) +type huffarbre = Feuille of motPondere | Noeud of (int * huffarbre * huffarbre);; + +(* On suppose que tous les huffarbres rencontrés seront valides *) +(* Il suffit ensuite de vérifier que les fonctions préservent la validité *) + +let poids arb = match arb with + | Feuille((s,i)) -> i + | Noeud(i,a,b) -> i;; + +(* Trie le premier élément d'une liste avec f fonction de comparaison*) +let rec triPremierListe f lst = + match lst with + | [e] -> [e] + | [] -> [] + | e1::e2::s ->if (f e1)>(f e2) + then e2::(triPremierListe f (e1::s)) + else lst;; + +(* Renvoie le triplet ordonné (pour l'ordre des poids des noeuds) de l'ensemble {a,b,c}*) +let sortThreeHuffarbres (a,b,c) = let i=poids a and j = poids b and k = poids c in + if i>=j && i>=k then (b,c,a) else + if j>=i && j>=k then (a,c,b) else + (*if k>=j and k>=i then*) (a,b,c);; + + + + +(* Fusionne deux noeuds *) +let rec mergeNodes n1 n2 = if (poids n1)>(poids n2) then mergeNodes n2 n1 else +let masseTotale = (poids n1) + (poids n2) in (* Conservation de la masse*) + match (n1,n2) with + | Feuille((s1,i1)),Feuille((s2,i2)) -> Noeud(masseTotale,n1,n2) + | Noeud(p,a,b),Feuille(m) -> Noeud(masseTotale,mergeNodes a b,n2) (*les deux plus lourds sont nécessairement b et m*) + | Feuille(m),Noeud(p,a,b) -> + (* On a trois éléments à transformer en deux (a,b,n1). + On fusionne les deux de poids le plus faible puis on les range tel que l'ordre soit préservé*) + let (ap,bp,cp) = (sortThreeHuffarbres (a,b,n1)) in (* On a cp qui est le plus lourd *) + let nouveau = (mergeNodes ap bp) in + if (poids nouveau)>=(poids cp) then Noeud(masseTotale,cp,nouveau) + else Noeud(masseTotale,nouveau,cp) + | Noeud(p1,a1,b1),Noeud(p2,a2,b2) -> Noeud(masseTotale,n1,n2);;(* On a nécessairement p1>p2 *) + + +let orderedMerge n1 n2 = if (poids n1)>=(poids n2) then mergeNodes n2 n1 else mergeNodes n1 n2;; +let motMerger n m = orderedMerge n (Feuille(m));; + +exception EmptyException;; +let intcompare i j = if i=j then 0 else if i raise EmptyException + | e::s -> List.fold_left motMerger (Feuille(e)) s;; + + + +(*###############################################################*) +(*# Fichiers #*) +(*###############################################################*) + +let read_file filename = + let lines = ref [] and chan = open_in filename in + try + while true; do + lines := input_line chan :: !lines + done; !lines + with End_of_file -> + close_in chan; + List.rev !lines ;; + +let filesize filename = let s = ref 0 and chan = open_in filename in + try + while true; do + s := !s+(String.length (input_line chan)) + done; !s + with End_of_file -> close_in chan; + !s ;; + +(*###############################################################*) +(*# WordMaker-input #*) +(*###############################################################*) + +let getMotsFromLine = Str.split (Str.regexp " ");; + +let getMotsFromFile filename = + let mots = ref [] and chan = open_in filename in + try + while true; do + mots := (getMotsFromLine (input_line chan)) @ !mots + done; !mots + with End_of_file -> + close_in chan; + !mots (* A l'envers mais on s'en fiche *);; + + + + +(*###############################################################*) +(*# WordMaker-output #*) +(*###############################################################*) + + + + +(*###############################################################*) +(*# Counters #*) +(*###############################################################*) + +module CountMap = Map.Make(String);; +let motsToCountmap mots = + let rec ajout mots dict = match mots with + | [] -> dict + | e::s -> let newDict = if CountMap.mem e dict + then CountMap.add e ((CountMap.find e dict)+1) dict + else CountMap.add e 1 dict in + ajout s newDict + in ajout mots CountMap.empty;; + +let countmapToMotsPList cmap = CountMap.fold (fun k v l -> (k,v)::l) cmap [];; + + +(*###############################################################*) +(*# Encodeurs #*) +(*###############################################################*) + +module Dict = Map.Make(String);; + +let arbreToDict abre = + let rec ajout prefixe dictionnaire abre = match abre with + | Feuille(s,i) -> Dict.add s prefixe dictionnaire + | Noeud(i,a,b) -> let prefixGauche = concat prefixe (1,[1n]) and + prefixDroit = concat prefixe (1,[0n]) in + let dic1 = ajout prefixGauche dictionnaire a in + ajout prefixDroit dic1 b + in ajout (0,[0n]) Dict.empty abre;; + +let dumpDict dict = Dict.iter (fun k v -> + printf "\"%s\" -> %s\n" k (bitstringToString v)) dict;; +let dictSize dict = Dict.fold (fun str bs v -> v+(String.length str)*8+8+(bitSize bs)+8) dict 0;; + + +let encodeMots mots dict = + let rec aux reste deja dict = match reste with + | [] -> deja + | e::r -> aux r (concat deja (Dict.find e dict)) dict in + aux mots (0,[0n]) dict;; + + +(*###############################################################*) +(*# Décodeurs #*) +(*###############################################################*) + + +(* A changer pour les mots spéciaux*) +let addMotToBuffer mot buffer = add_string buffer mot;add_string buffer " ";; + +let rec binarintReader arbre0 abre buffer n i = + match abre with + | Feuille(m,p) -> + addMotToBuffer m buffer;print_endline "f"; + binarintReader arbre0 arbre0 buffer n i + | Noeud(p,a,b) -> print_endline (string_of_int i); + if i>=isize + then abre(* On est au bout du binarint, on peut renvoyer l'état actuel*) + else + match (ithBit n i) with + | Zero -> binarintReader arbre0 b buffer n (i+1) + | Un -> binarintReader arbre0 a buffer n (i+1) ;; + + +let rec reader arbre0 toReadL buffer= + print_endline (string_of_int (Buffer.length buffer)); + match toReadL with + | [] -> arbre0 (* C'est vide, rien à ajouter*) + | next::reste -> let abre = reader arbre0 reste buffer in(* On lit d'abord les binarints suivants *) + binarintReader arbre0 abre buffer next 0;; + +exception DecodeException;; + +let decodeBitstring arbre bs = let buffer = Buffer.create 100 in(*TODO mettre la capacité initiale en paramètre et essayer de la "déduire" de la taille du fichier*) + match bs with + | (i,[]) -> "" + | (i,last::toRead) -> let abre = reader arbre toRead buffer in + let abreFin = binarintReader arbre abre buffer (last >>> (isize-i)) (isize-i) in + if abreFin != arbre then raise (DecodeException) + else let out = Buffer.contents buffer in Buffer.reset buffer; out;; +;; + + + +(*###############################################################*) +(*# Procédure #*) +(*###############################################################*) + +let livreFilename = "/home/mysaa/Documents/HistoireDesNombresEtDeLaNumerationNumerique.txt";; +let mots = getMotsFromFile livreFilename;; +let countmap = motsToCountmap mots;; +CountMap.find "un" countmap;; +let motpList = countmapToMotsPList countmap;; +List.rev (List.sort compareMots motpList);; +let harbre = construireArbre motpList;; +let dict = arbreToDict harbre;; +dumpDict dict;; +let encode = encodeMots mots dict;; +print_string (bitstringToString encode);; +print_endline (string_of_int (bitSize encode));; +let s = bitSize encode and fs = filesize livreFilename and ds = dictSize dict in + printf "%i bits, soit %i octets dont %i pour le dictionnaire sur un fichier de %i octets (%f %% de taux de compression)" + (s+ds) ((s+ds)/8) (ds/8) fs (100. *. ((float_of_int (s+ds))/. 8. /. (float_of_int fs)));; + +let decode = decodeBitstring harbre encode;; +print_endline (decode);; + + + + + + + + + + + + + + + + + + + + + + + + + + + +(*###############################################################*) +(*# Tests #*) +(*###############################################################*) + + + +let x = ref (5,[13L]);; +let x0 = !x;; +let zer = (0,[]);; +#untrace ajout;; +for i=1 to 200 do + x := concat x0 !x; + print_endline (string_of_int i); + let i,l = !x in List.iter (function i6 -> print_endline (printInt64 i6)) l +done;; +printInt64 leftFilters.(32);; +printInt64 13L;; + +printInt64 3722304989L;; + +parseBinarint "010101";; + +bitstringToString (12,[42;42]);; + +for i=0 to 65 do + print_endline (binarintToString msf.(i)) +done;; + +(* Test de rapidité *) +let t0=Sys.time () in +let a = of_int 6745678876567878 and b = 12 in + for i=0 to 500_000_000 do + shift_right a b + done; + print_endline (string_of_float((Sys.time ())-. t0)); +let t0=Sys.time () in +let a = 79066342487687576 and b = 12 in + for i=0 to 500_000_000 do + a lsr b + done; + print_endline (string_of_float ((Sys.time ())-. t0));; +(* Résultats: int reste plus rapide *) + + +let x = ref (6,[42n]);; +let x0 = !x;; +print_endline (bitstringToString x0);; +for i=0 to 60 do + x := concat !x x0; + print_endline (bitstringToString !x) +done;; + +let testListe = [("a",35);("b",10);("c",19);("d",25);("e",06);("f",05)];; +let out = List.fold_left motMerger (Feuille(("de",3))) testListe;; +construireArbre testListe;; \ No newline at end of file