#!/bin/sh # # This script removes the HTML formatting from a file. If the file was designed # with such use in mind and was properly formatted besides HTML (such as the README # file for ttf2pt1) it will look good as a plain text file. # # This script supports a very limited set of HTML formatting. Everything that # goes before is removed. Any lines that # contain only the HTML formatting or start with "" # are completely removed. Then all the in-line formatting is removed. # Then " ", "<", ">" are changed to " ", "<", ">". sed '1,/<[bB][oO][dD][yY]>/d; /^/-/g; s/^ *$/>>/; s/<[^<>]*>//g; /^< *>$/d; /^>>$/d;s/^< //; s/>$//; s/&[nN][bB][sS][pP];/ /g;s/&[lL][tT];//g;s/&[aA][mM][pP];/\&/g;'