When fetching html from a website, how can I extract specific html content instead of getting all of it? I have attempted the following method:
Before appending data to the target below
container.html(data);
I would like to perform something like
data.find('.site-header').html();
and then execute container.html(data);
Is there a way to accomplish this?
HTML
<div id="target"></div>
Script
$(function () {
var container = $('#target');
var url = 'http://aamirshahzad.net';
$.getJSON("http://query.yahooapis.com/v1/public/yql?" +
"q=select%20*%20from%20html%20where%20url%3D%22" + encodeURIComponent(url) +
"%22&format=xml'&callback=?",
function (data) {
if (data.results[0]) {
var filteredData = filterData(data.results[0]);
container.html(filteredData);
} else {
var errormsg = '<p>Error: could not load the page.</p>';
container.html(errormsg).focus().effect('highlight', {
color: '#c00'
}, 1000);
}
});
});
function filterData(data) {
// filter all unwanted content
// no body tags
data = data.replace(/<?\/body[^>]*>/g, '');
// no linebreaks
data = data.replace(/[\r|\n]+/g, '');
// no comments
data = data.replace(/<--[\S\s]*?-->/g, '');
// no noscript blocks
data = data.replace(/<noscript[^>]*>[\S\s]*?<\/noscript>/g, '');
// no script blocks
data = data.replace(/<script[^>]*>[\S\s]*?<\/script>/g, '');
// no self closing scripts
data = data.replace(/<script.*\/>/, '');
// [... add more filters as needed ...]
return data;
}