1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
#!/bin/bash [ $# -lt 1 ] && echo "$0 itop_wiki_url" && exit 1 URL=$1 FILE=/tmp/itop_wiki.html MD=itop_wiki.md echo $URL |grep -q "https://www.itophub.io" && r=0 || r=1 if [ $r -eq 0 ];then curl -s "$URL" -o $FILE else cp -f $URL $FILE fi xmllint --html --xpath "//div[@id='dokuwiki__content']" $FILE 2>/dev/null |pandoc -f html -t gfm > $MD # 删除开头结尾多余的行 sed -i '/class="clearer"/,$d' $MD sed -i '/dokuwiki__content/,/^# /d' $MD # plugin_note 替换为 mkdocs 格式 sed -i '/<div class="plugin_note/,/<\/div>/{/<div class="plugin_note/n;/<\/div>/b;s/^/\t/g}' $MD sed -i 's/<div class="plugin_note noteimportant">/!!! warning/g' $MD sed -i 's/<div class="plugin_note notewarning">/!!! warning/g' $MD sed -i 's/<div class="plugin_note notetip">/!!! tip/g' $MD sed -i 's/<div class="plugin_note noteclassic">/!!! note/g' $MD sed -i 's/<div class="plugin_note noteinfo">/!!! info/g' $MD sed -i '/<\/div>/d' $MD sed -i '/class="level/d' $MD # 图片链接 sed -r -i 's#<a href=.*(<img src.* />)</a>#\1#g' $MD sed -i 's#src="/wiki/media#src="https://www.itophub.io/wiki/media#g' $MD # 处理li sed -i 's/<div class="li">//g' $MD # 处理其他 div sed -i '/<div class=/d' $MD # 删除 fixme sed -i '/alt="FIXME"/d' $MD |
发表回复