#!/bin/bash outputfile="wikifile.ascii.txt" limit=10000 rm -f $outputfile touch $outputfile while [ $(du --bytes $outputfile | cut -f1) -lt $limit ] do echo "Current file size: $(du --bytes $outputfile | cut -f1)" curl -sSL "https://fr.wikipedia.org/wiki/Sp%C3%A9cial:Page_au_hasard" \ | awk 'BEGIN {found = 0} {if (found || $0 ~ /id=\"firstHeading\"/) {found = 1; print}}' \ | awk 'BEGIN {found = 0} {if (found || $0 ~ /id=\"Notes\"/) {found = 1} else {print}}' \ | trafilatura \ | iconv -f utf-8 -t ascii//TRANSLIT \ >> $outputfile done