From 74a2e58cbc88c675982eef6a5be03139e519af82 Mon Sep 17 00:00:00 2001 From: Mysaa Date: Mon, 24 May 2021 14:35:08 +0200 Subject: [PATCH] =?UTF-8?q?Premier=20commit=20-=20Inclusion=20dans=20le=20?= =?UTF-8?q?syst=C3=A8me=20git?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 4 + src/com/bernard/freq/TestsMain.java | 24 +++++ src/com/bernard/freq/TextFrequence.java | 122 ++++++++++++++++++++++++ 3 files changed, 150 insertions(+) create mode 100644 .gitignore create mode 100644 src/com/bernard/freq/TestsMain.java create mode 100644 src/com/bernard/freq/TextFrequence.java diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..572533e --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.classpath +.settings +.project +bin/ diff --git a/src/com/bernard/freq/TestsMain.java b/src/com/bernard/freq/TestsMain.java new file mode 100644 index 0000000..df04411 --- /dev/null +++ b/src/com/bernard/freq/TestsMain.java @@ -0,0 +1,24 @@ +package com.bernard.freq; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; + +public class TestsMain { + + public static void main(String[] args) throws IOException { + String texte = new String(Files.readAllBytes(Paths.get("/media/samy/Samy's data/Data/Gutenberg/La vie I ROBERT BURNS.txt"))); + System.out.println("Fichier lu "); + TextFrequence freqBase = new TextFrequence(texte); + freqBase.outputFrequences(Paths.get("out.html"), 10); + TextFrequence.generateRandomText(Paths.get("/media/samy/Samy's data/Data/Gutenberg/Je suis un texte aleatoire.txt"), 30000, freqBase.alphabet); + for (File f : Paths.get("/media/samy/Samy's data/Data/Gutenberg/").toFile().listFiles()) { + String txt = new String(Files.readAllBytes(f.toPath())); + System.out.println("Fichier lu :"+f.getName()); + TextFrequence freqTxt = new TextFrequence(txt); + System.out.println("\tError : "+TextFrequence.getFitness(freqBase, freqTxt)); + } + } + +} diff --git a/src/com/bernard/freq/TextFrequence.java b/src/com/bernard/freq/TextFrequence.java new file mode 100644 index 0000000..927c0a6 --- /dev/null +++ b/src/com/bernard/freq/TextFrequence.java @@ -0,0 +1,122 @@ +package com.bernard.freq; + +import java.io.IOException; +import java.math.BigDecimal; +import java.nio.charset.Charset; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.text.Normalizer; +import java.text.Normalizer.Form; +import java.util.Arrays; +import java.util.PrimitiveIterator.OfInt; +import java.util.Random; +import java.util.stream.IntStream; + +public class TextFrequence { + String alphabet = "abcdefghijklmnopqrstuvwxyz "; + BigDecimal[][] frequences;// long[a][b] = nbre de a suivis d'un b (nombre de + // 'ab') + long[][] sums; + int alphabetJokerChar = 26; + public int precision = 100; + + public TextFrequence(String str) { + this(str.chars()); + } + + public TextFrequence(IntStream stream) { + // Init arrays + long[] nullLine = new long[alphabet.length()]; + frequences = new BigDecimal[alphabet.length()][alphabet.length()]; + sums = new long[alphabet.length()][alphabet.length()]; + Arrays.fill(nullLine, 0L); + for (int i = 0; i < nullLine.length; i++) { + System.arraycopy(nullLine, 0, sums[i], 0, alphabet.length()); + } + // Fill sums array + OfInt iterator = stream.iterator(); + char current; + char last = (char) iterator.next().byteValue(); + while (iterator.hasNext()) { + current = (char) iterator.next().byteValue(); + sums[getAlphabetPos(last)][getAlphabetPos(current)] += 1L; + last = current; + } + // Fill frequence array + for (int i = 0; i < alphabet.length(); i++) { + long tempSum = 1L;// A voir + for (int j = 0; j < alphabet.length(); j++) + tempSum += sums[i][j]; + for (int j = 0; j < alphabet.length(); j++) + frequences[i][j] = BigDecimal.valueOf(sums[i][j]).divide(BigDecimal.valueOf(tempSum), precision, + BigDecimal.ROUND_DOWN); + } + } + + @Override + public String toString() { + return "TextFrequence" + Arrays.deepToString(frequences); + } + + public void outputFrequences(Path out, int precision) { + StringBuilder builder = new StringBuilder(); + builder.append( + "\n"); + builder.append("\n\t\n"); + builder.append("\t\t\n"); + for (int i = 0; i < alphabet.length(); i++) + builder.append("\t\t\n"); + builder.append("\t\n"); + for (int i = 0; i < frequences.length; i++) { + builder.append("\t\n"); + builder.append("\t\t\n"); + for (int j = 0; j < frequences[i].length; j++) { + BigDecimal bigDecimals = frequences[i][j].setScale(precision, BigDecimal.ROUND_DOWN); + builder.append("\t\t\n"); + } + builder.append("\t\n"); + } + builder.append("
'" + alphabet.charAt(i) + "'
'" + alphabet.charAt(i) + "'" + bigDecimals.toString() + "
\n"); + try { + out.toFile().createNewFile(); + Files.write(out, builder.toString().getBytes(Charset.forName("UTF-8")), StandardOpenOption.WRITE); + } catch (IOException e) { + System.err.println("Failed to write the array in a file"); + e.printStackTrace(); + } + } + + public int getAlphabetPos(char character) { + int alphabetCharPos = alphabet.indexOf((Normalizer.normalize(String.valueOf(character), Form.NFD).toLowerCase() + .replaceAll("\\p{InCombiningDiacriticalMarks}+", ""))); + if (alphabetCharPos == -1) + alphabetCharPos = alphabetJokerChar; + return alphabetCharPos; + } + + public static BigDecimal getFitness(TextFrequence base, TextFrequence toTest){ + BigDecimal error = BigDecimal.ZERO; + for (int i = 0; i < base.alphabet.length(); i++) { + for (int j = 0; j < base.alphabet.length(); j++) { + error = error.add(base.frequences[i][j].subtract(toTest.frequences[i][j]).abs()); + } + } + return error; + } + public static void generateRandomText(Path out,long size,String alphabet){ + String txt = ""; + Random r = new Random(); + for (int i = 0; i < size; i++) { + txt += alphabet.charAt(r.nextInt(alphabet.length())); + } + try { + out.toFile().createNewFile(); + Files.write(out, txt.getBytes(), StandardOpenOption.WRITE); + } catch (IOException e) { + System.err.println("Impossible de crér le fichier"); + e.printStackTrace(); + } + } + +}