Sunday, June 10, 2007 1:05 PM
Today, we have plenty of rss sources like syndic8, feedburner and many more which i am not bringing down or about which i dont know.
Now, the problem with this RSS repos is that they are too many RSS sources, but only few are very useful. To get really useful list of RSS , Lets' say , you have imported list of RSS , but you dont really know which ones to prefer to users.One thing , you can do, mark RSS feeds with different values on basis of some criterila that will bring the useful on top others.
Question is, what could be the criterial for finding really useful RSS, To me the most useful ones are
- RSS which have images.
- RSS that contains full post
- RSS , in which , there is atleast a recent post.
I achived that writing a small piece of C# code that will get the rss content for a provided url and then i will do some content parsing and some Regex to shuffle things exactly the way i need.
Here is the code snippet.
You can wrap this in a function , and call this for each RSS url you have in your repo.
try
{
WebRequest request = WebRequest.Create("##GIVE THE URL FOR YOUR NEED##");
XmlDocument doc = new XmlDocument();
doc.Load(request.GetResponse().GetResponseStream());
XmlNodeList nodes = doc.GetElementsByTagName("item");
bool WeekMarked = false;
// use a counter go through couple items , to check if this feed is really useul
int count = 0;
foreach (XmlNode node in nodes)
{
// highest item to check threshold, number can be anything according to your need.
if (count == 5)
break;
if (node is XmlElement)
{
XmlElement elemnent = (XmlElement)node;
XmlNode channelItem = elemnent.FirstChild;
while (channelItem != null)
{
// some feed uses encoded to embbed their whole content.
if (channelItem.Name == "content:encoded")
{
//COntains Full Posting, Do something useful here
string content = channelItem.InnerText;
Match imageMatch = Regex.Match(content.ToLower(), "<img.[^/]*/[^>]*>"); // find the image tag
if (!string.IsNullOrEmpty(imageMatch.Value))
{
//Contains Image,, Do somehing userful Here
}
}
// those feed that dont use content:encoded , do some manual description tag look up
if (string.Compare(channelItem.Name, "description", true) == 0)
{
XmlNode descriptionNestedElement = channelItem.FirstChild;
while (descriptionNestedElement != null)
{
if (string.Compare(descriptionNestedElement.Name, "#text", true) == 0)
{
string content = descriptionNestedElement.InnerText;
if (content.IndexOf("\r\n") > 0)
{
content = content.Substring(0, content.IndexOf("\r\n"));
}
try
{
Match imageMatch = Regex.Match(content.ToLower(), "<img.[^/]*/[^>]*>");
if (!string.IsNullOrEmpty(imageMatch.Value))
{
// Contains Image , Do something useful here
}
Match html = Regex.Match(content.ToLower(), "<[^>]*>");
if (!string.IsNullOrEmpty(html.Value))
{
// Containts Full Post Do something useful here
}
}
catch
{
// partial Post , do something useful here
}
}
descriptionNestedElement = descriptionNestedElement.NextSibling;
}
}
if (string.Compare(channelItem.Name, "pubdate", true) == 0 && (!WeekMarked))
{
DateTime time = DateTime.Parse(channelItem.InnerXml);
if (DateTime.Now.AddDays(-13) > time)
{
//Two weeks old, add something useful for you.
WeekMarked = true;
}
}
channelItem = channelItem.NextSibling;
}
}
count++;
}
}
catch
{
}