According to Joseph Webber, extracting the text ('foo') from <h1>foo</h1> without using JavaScript is a challenging task. However, creating a table of contents from selected WordPress blog posts can be achieved with the following JS script. It assumes a specific structure:
<div id="pagecontent">
<ul>
<li><a ...>1st Title</a></li>
...
<li><a ...>last Title</a><li>
</ul>
...
<h1 ...>1st Title</h1>
...
<h1 ...>last Title</h1>
</div>
<script>
window.onload = function () {
linesHTML = document.getElementById("pagecontent").innerHTML; /*get HTML-Text*/
linesHTML = linesHTML.split('\n'); /*split into lines*/
for (i=0;i<linesHTML.length;i++) { /* loop over the lines */
posH = linesHTML[i].indexOf('<h1'); /*get position of <h1> header*/
posA = linesHTML[i].indexOf('<a'); /*get position of <a> anchor*/
if (posH > -1){ /*header found?*/
linkText=getLinkText(linesHTML[i],'h1'); /*get the header text */
linesHTML[i] = linesHTML[i].substring(0,posH+4) + 'id="' +linkText + '" ' + linesHTML[i].substring(posH+4); /*add the anchor as id with linkText as anchor*/
} else if (posA > -1) { /*anchor found?*/
linkText=getLinkText(linesHTML[i],'a'); /*get anchor from TOC*/
linesHTML[i] = linesHTML[i].substring(0,posA+1) +'a href="#' + linkText + linesHTML[i].substring(linesHTML[i].indexOf('a">')+1); /*replace anchor with linkText*/
};
};
document.getElementById("pagecontent").innerHTML=linesHTML.join('\n'); /*replace the doc with the all the lines*/
};
function getLinkText(line,htmlToken) { /*get the text of header/anchor*/
linkText=line.substring(line.indexOf(htmlToken+'">')+htmlToken.length+2,line.indexOf('</'+htmlToken+'>'));
linkText=linkText.replace(/[^A-z^0-9]/g,'-'); /*replace all non alphanum chars by '-'*/
return linkText;
};
<script>