19 lines
554 B
Bash
19 lines
554 B
Bash
#!/bin/bash
|
|
|
|
outputfile="wikifile.ascii.txt"
|
|
limit=10000
|
|
|
|
rm -f $outputfile
|
|
touch $outputfile
|
|
|
|
while [ $(du --bytes $outputfile | cut -f1) -lt $limit ]
|
|
do
|
|
echo "Current file size: $(du --bytes $outputfile | cut -f1)"
|
|
curl -sSL "https://fr.wikipedia.org/wiki/Sp%C3%A9cial:Page_au_hasard" \
|
|
| awk 'BEGIN {found = 0} {if (found || $0 ~ /id=\"firstHeading\"/) {found = 1; print}}' \
|
|
| awk 'BEGIN {found = 0} {if (found || $0 ~ /id=\"Notes\"/) {found = 1} else {print}}' \
|
|
| trafilatura \
|
|
| iconv -f utf-8 -t ascii//TRANSLIT \
|
|
>> $outputfile
|
|
done
|