#!/usr/bin/mawk -f
#
# by: Jesus Galan (yiyus) 2009
#
# Usage: md2html.awk file.md > file.html
# See: http://4l77.com/src/md2html.awk
function eschtml(t) {
gsub("&", "\\&", t);
gsub("<", "\\<", t);
return t;
}
function oprint(t){
if(nr == 0)
print t;
else
otext = otext "\n" t;
}
function subref(id){
for(; nr > 0 && sub("<<" id, ref[id], otext); nr--);
if(nr == 0 && otext) {
print otext;
otext = "";
}
}
function nextil(t) {
if(!match(t, /[`<&\[*_\\-]|(\!\[)/))
return t;
t1 = substr(t, 1, RSTART - 1);
tag = substr(t, RSTART, RLENGTH);
t2 = substr(t, RSTART + RLENGTH);
if(ilcode && tag != "`")
return eschtml(t1 tag) nextil(t2);
# Backslash escaping
if(tag == "\\"){
if(match(t2, /^[\\`*_{}\[\]()#+\-\.!]/)){
tag = substr(t2, 1, 1);
t2 = substr(t2, 2);
}
return t1 tag nextil(t2);
}
# Dashes
if(tag == "-"){
if(sub(/^-/, "", t2))
tag = "—";
return t1 tag nextil(t2);
}
# Inline Code
if(tag == "`"){
if(sub(/^`/, "", t2)){
if(!match(t2, /``/))
return t1 "”" nextil(t2);
ilcode2 = !ilcode2;
}
else if(ilcode2)
return t1 tag nextil(t2);
tag = "";
if(ilcode){
t1 = eschtml(t1);
tag = "
";
}
ilcode = !ilcode;
return t1 tag nextil(t2);
}
if(tag == "<"){
# Autolinks
if(match(t2, /^[^ ]+[\.@][^ ]+>/)){
url = eschtml(substr(t2, 1, RLENGTH - 1));
t2 = substr(t2, RLENGTH + 1);
linktext = url;
if(match(url, /@/) && !match(url, /^mailto:/))
url = "mailto:" url;
return t1 "" linktext "" nextil(t2);
}
# Html tags
if(match(t2, /^[A-Za-z\/!][^>]*>/)){
tag = tag substr(t2, RSTART, RLENGTH);
t2 = substr(t2, RLENGTH + 1);
return t1 tag nextil(t2);
}
return t1 "<" nextil(t2);
}
# Html special entities
if(tag == "&"){
if(match(t2, /^#?[A-Za-z0-9]+;/)){
tag = tag substr(t2, RSTART, RLENGTH);
t2 = substr(t2, RLENGTH + 1);
return t1 tag nextil(t2);
}
return t1 "&" nextil(t2);
}
# Images
if(tag == "!["){
if(!match(t2, /(\[.*\])|(\(.*\))/))
return t1 tag nextil(t2);
match(t2, /^[^\]]*/);
alt = substr(t2, 1, RLENGTH);
t2 = substr(t2, RLENGTH + 2);
if(match(t2, /^\(/)){
# Inline
sub(/^\(/, "", t2);
match(t2, /^[^\)]+/);
url = eschtml(substr(t2, 1, RLENGTH));
t2 = substr(t2, RLENGTH + 2);
title = "";
if(match(url, /[ ]+\".*\"[ ]*$/)) {
title = substr(url, RSTART, RLENGTH);
url = substr(url, 1, RSTART - 1);
match(title, /\".*\"/);
title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\"";
}
if(match(url, /^<.*>$/))
url = substr(url, 2, RLENGTH - 2);
return t1 "" nextil(t2);
}
else{
# Referenced
sub(/^ ?\[/, "", t2);
id = alt;
if(match(t2, /^[^\]]+/))
id = substr(t2, 1, RLENGTH);
t2 = substr(t2, RLENGTH + 2);
if(ref[id])
r = ref[id];
else{
r = "<<" id;
nr++;
}
return t1 "" nextil(t2);
}
}
# Links
if(tag == "["){
if(!match(t2, /(\[.*\])|(\(.*\))/))
return t1 tag nextil(t2);
match(t2, /^[^\]]*(\[[^\]]*\][^\]]*)*/);
linktext = substr(t2, 1, RLENGTH);
t2 = substr(t2, RLENGTH + 2);
if(match(t2, /^\(/)){
# Inline
match(t2, /^[^\)]+(\([^\)]+\)[^\)]*)*/);
url = substr(t2, 2, RLENGTH - 1);
pt2 = substr(t2, RLENGTH + 2);
title = "";
if(match(url, /[ ]+\".*\"[ ]*$/)) {
title = substr(url, RSTART, RLENGTH);
url = substr(url, 1, RSTART - 1);
match(title, /\".*\"/);
title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\"";
}
if(match(url, /^<.*>$/))
url = substr(url, 2, RLENGTH - 2);
url = eschtml(url);
return t1 "" nextil(linktext) "" nextil(pt2);
}
else{
# Referenced
sub(/^ ?\[/, "", t2);
id = linktext;
if(match(t2, /^[^\]]+/))
id = substr(t2, 1, RLENGTH);
t2 = substr(t2, RLENGTH + 2);
if(ref[id])
r = ref[id];
else{
r = "<<" id;
nr++;
}
pt2 = t2;
return t1 "" nextil(linktext) "" nextil(pt2);
}
}
# Emphasis
if(match(tag, /[*_]/)){
ntag = tag;
if(sub("^" tag, "", t2)){
if(stag[ns] == tag && match(t2, "^" tag))
t2 = tag t2;
else
ntag = tag tag
}
n = length(ntag);
tag = (n == 2) ? "strong" : "em";
if(match(t1, / $/) && match(t2, /^ /))
return t1 tag nextil(t2);
if(stag[ns] == ntag){
tag = "/" tag;
ns--;
}
else
stag[++ns] = ntag;
tag = "<" tag ">";
return t1 tag nextil(t2);
}
}
function inline(t) {
ilcode = 0;
ilcode2 = 0;
ns = 0;
return nextil(t);
}
function printp(tag) {
if(!match(text, /^[ ]*$/)){
text = inline(text);
if(tag != "")
oprint("<" tag ">" text "" tag ">");
else
oprint(text);
}
text = "";
}
BEGIN {
blank = 0;
code = 0;
hr = 0;
html = 0;
nl = 0;
nr = 0;
otext = "";
text = "";
par = "p";
}
# References
!code && /^ *\[[^\]]*\]:[ ]+/ {
sub(/^ *\[/, "");
match($0, /\]/);
id = substr($0, 1, RSTART - 1);
sub(id "\\]:[ ]+", "");
title = "";
if(match($0, /\".*\"$/))
title = "\" title=\"" substr($0, RSTART + 1, RLENGTH - 2);
sub(/[ ]+\".*\"$/, "");
url = eschtml($0);
ref[id] = url title;
subref(id);
next;
}
# html
!html && /^<(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\
isindex|menu|noframes|noscript|ol|p|pre|table|ul|!--)/ {
if(code)
oprint("");
for(; !text && block[nl] == "blockquote"; nl--)
oprint("");
match($0, /^<(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\
isindex|menu|noframes|noscript|ol|p|pre|table|ul|!--)/);
htag = substr($0, 2, RLENGTH - 1);
if(!match($0, "(<\\/" htag ">)|((^
");
code = 1;
$0 = eschtml($0);
oprint($0);
next;
}
code {
oprint("
");
code = 0;
}
# Setex-style Headers
text && /^=+$/ {printp("h1"); next;}
text && /^-+$/ {printp("h2"); next;}
# Atx-Style headers
/^#+/ && (!newli || par=="p" || /^##/) {
for(n = 0; n < 6 && sub(/^# */, ""); n++)
sub(/#$/, "");
par = "h" n;
}
# Paragraph
/^$/ {
printp(par);
par = "p";
next;
}
# Add text
{ text = (text ? text " " : "") $0; }
END {
if(code){
oprint("");
code = 0;
}
printp(par);
for(; nl > 0; nl--){
if(match(block[nl], /[ou]l/))
oprint("