diff --git a/wikisourcer b/wikisourcer new file mode 100644 index 0000000..f90b475 --- /dev/null +++ b/wikisourcer @@ -0,0 +1,18 @@ +#!/bin/bash + +outputfile="wikifile.ascii.txt" +limit=10000 + +rm -f $outputfile +touch $outputfile + +while [ $(du --bytes $outputfile | cut -f1) -lt $limit ] +do + echo "Current file size: $(du --bytes $outputfile | cut -f1)" + curl -sSL "https://fr.wikipedia.org/wiki/Sp%C3%A9cial:Page_au_hasard" \ + | awk 'BEGIN {found = 0} {if (found || $0 ~ /id=\"firstHeading\"/) {found = 1; print}}' \ + | awk 'BEGIN {found = 0} {if (found || $0 ~ /id=\"Notes\"/) {found = 1} else {print}}' \ + | trafilatura \ + | iconv -f utf-8 -t ascii//TRANSLIT \ + >> $outputfile +done