28 January 2015

Parse News RSS 2.0 image in C#

If you want to parse News RSS 2.0 image from link, for example > http://codecanyon.net/feeds/new-codecanyon-items.atom

The following code can be used to match all img src in the source text and to populate list with value of src attribute.

private static IEnumerable<string> GetImagesInHTMLString(string htmlString)
        {
            var images = new List<string>();
            const string pattern = Imgpattern;
            var rgx = new Regex(pattern, RegexOptions.IgnoreCase);
            MatchCollection matches = rgx.Matches(htmlString);

            for (int i = 0, l = matches.Count; i < l; i++)
            {
                if (matches[i].Value.Contains(".jpg") || matches[i].Value.Contains(".png"))
                {
                    var ms = Regex.Matches(matches[i].Value, Urlpattern);
                    if (ms.Count > 0)
                    {
                        if (!string.IsNullOrEmpty(ms[0].Value))
                            images.Add(ms[0].Value.Replace("\"", string.Empty));
                    }
                }
            }

            return images;
        }

No comments: