SEO Processor

Created: 15 Mar 2016, last update: 30 Jan 2022

Sitecore SEO Processor a XML Sitemap and robots.txt pipeline

I have updated the Multisite and Multilingual SEO XML sitemap for search engine. The previous version was mainly as simple as possible with little code and maximum useful (with changefreq and priority). The old can be great for you but with this you will have an additional choice. For maximum editor friendly you need an integration with your site, it is not optimal to do this in a readymade module. So I don’t put it as a module on the marketplace, just some code you can use and adjust see this SEO XML sitemap article for the other options and the modules on the marketplace.

Why this one:

  • Multisite
  • Multilingual websites
  • Easy to modify
  • Support for changefreq and priority
  • https and http

The changes:

  • Integrated with a dynamic robots.txt, you need the robots.txt to give the url for the sitemap.xml, and because Sitecore security hardening recommendations don't allow xml
  • Instead of using the layout, and the need for a sitemap item below your root node, I choose for a pipeline process. only you can not more using layout caching so adding a Sitecore custom cache.
  • As this implementation does not contain a whery query. Instead of a Sitecore fast query use the Axes.GetDescendants() it use the item cache and that’s is beter than get it from the database. You can change it and use the Sitecore search API, index your field and add it to the resultset and use that.
  • Instead of a hide or display in Sitemap field reuse a Robot Meta tag field index, follow. noindex, nofollow This field can also be used for the <meta name="robots" CONTENT="index, follow" /> tag
  • Update the validation regular expression for the Sitemap Priority Field

Create the Sitemap and robots.txt
First create some properties item in sitecore for the Meta Robot and the Sitemap change frequency.

Create Page settings fields for al for Site page templates.

 

For the XML Sitemap Priority Field use a regex Validation this one ^$|(^([0]\.[0-9]$)|(^[1]$)) limits the input from 0.0 to 1
Also create a Field for the robots.txt content, a Multi-Line Text field in you Site Config item or your home item.

See the SEO XML Sitemap article for more details.

SeoProcessor.cs

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using Sitecore.Data.Items;
using Sitecore.Pipelines.HttpRequest;

namespace SeoProcessor
{
    public class SeoProcessor : HttpRequestProcessor
    {
        public override void Process(HttpRequestArgs args)
        {
            HttpContext context = HttpContext.Current;

            if (context == null)
            {
                return;
            }

            string requestUrl = context.Request.Url.ToString();

            if (string.IsNullOrEmpty(requestUrl))
            {
                return;
            }
            
            if (requestUrl.ToLower().TrimEnd('/').EndsWith("robots.txt"))
            {
                ProcessRobots(context);
            }
            else if (requestUrl.ToLower().EndsWith("sitemap-xml"))
            {
                ProcessSitemap(context);
            }
        }

        private static void ProcessRobots(HttpContext context)
        {
            //the default robots, may be overide by the value of the "Site Robots TXT" field in the "Site Config" item
            string robotsTxtContent = @"User-agent: *" + Environment.NewLine +
                                      "Disallow: /Lightbox/" + Environment.NewLine +
                                      "Disallow: /lightbox/" + Environment.NewLine + Environment.NewLine +
                                      "Sitemap: " + HttpContext.Current.Request.Url.Scheme + Uri.SchemeDelimiter + HttpContext.Current.Request.Url.Host + "/sitemap-xml";
            
            if (global::Sitecore.Context.Site != null && global::Sitecore.Context.Database != null)
            {
                Item siteconfigNode = GetConfigNode();

                if (siteconfigNode != null)
                {
                    var configRobotTxt = siteconfigNode.GetStringValue("Site Robots TXT");
                    if (!string.IsNullOrEmpty(configRobotTxt))
                    {
                        robotsTxtContent = configRobotTxt;
                        if (!robotsTxtContent.ToLower().Contains("sitemap:") || !robotsTxtContent.ToLower().Contains("/sitemap-xml"))
                        {
                            robotsTxtContent += Environment.NewLine + Environment.NewLine + "Sitemap: " + HttpContext.Current.Request.Url.Scheme + Uri.SchemeDelimiter + HttpContext.Current.Request.Url.Host + "/sitemap-xml";
                        }
                    }
                }
            }

            context.Response.ContentType = "text/plain";
            context.Response.Write(robotsTxtContent);
            context.Response.End();
        }

        private static void ProcessSitemap(HttpContext context)
        {
            context.Response.ContentType = "text/xml";
            context.Response.Write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + Environment.NewLine);
            context.Response.Write("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\">" + Environment.NewLine);

            //some site specific logic for multisite
            if (System.String.CompareOrdinal(Sitecore.Context.Site.Name, "myBelgiumWebsite") == 0)
            {
                var language = new List<string>();
                language.Add("nl-BE");
                language.Add("fr-BE");
                context.Response.Write(XmlSitemap.GetCacheXml(language));
            }
            else
            {
                context.Response.Write(XmlSitemap.GetCacheXml(null));
            }
            
            context.Response.Write(Environment.NewLine + "</urlset>");
            context.Response.End();
        }

        public static Item GetConfigNode()
        {
            Sitecore.Sites.SiteContext site = Sitecore.Context.Site;
            if (site == null) return null;

            Sitecore.Data.Database db = Sitecore.Context.Database;
            if (db == null) return null;
            Item start = db.GetItem(site.StartPath);
            if (start == null) return null;

            var siteConfigNode = start.Parent.Children.FirstOrDefault(item => item.TemplateName == "Site Config");
            return siteConfigNode;
        }
    }
}

XmlSitemap.cs

using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Web;
using Sitecore;
using Sitecore.Caching;
using Sitecore.Data;
using Sitecore.Data.Items;
using Sitecore.Data.Managers;
using Sitecore.Globalization;
using Sitecore.Links;

namespace SeoProcessor
{
    public static class XmlSitemap
    {
        static readonly Cache sitemapCache = new Sitecore.Caching.Cache("mirabeau-xml-sitemap", StringUtil.ParseSizeString("2MB"));

        static XmlSitemap()
        {
            //flush the cache after a publish.
            Sitecore.Events.Event.Subscribe("publish:end", SitemapCache_OnPublishEnd);
            Sitecore.Events.Event.Subscribe("publish:end:remote", SitemapCache_OnPublishEnd); 
        }

        static void SitemapCache_OnPublishEnd(object sender, System.EventArgs eventArgs)
        {
            sitemapCache.Clear();
        }

        /// <summary>
        /// Get the sitemap cache or generate if the cache is empty
        /// </summary>
        /// <param name="languagelist">leave blank for monolingual</param>
        /// <returns></returns>
        public static string GetCacheXml(List<string> languagelist)
        {
            if (sitemapCache != null)
            {
                string protocol = HttpContext.Current.Request.IsSecureConnection ? "-s-" : "-";
                //key contain the host and also the database because the difference between master and web.
                var cachekey = Context.Site.Name + protocol + HttpContext.Current.Request.Url.Host.ToLower() + "-" + Sitecore.Context.Database.Name;
                if (languagelist != null)
                {
                    cachekey += "-"+string.Concat(languagelist);
                }
                string sitemap = (string) sitemapCache.GetValue(cachekey);
                if (string.IsNullOrEmpty(sitemap))
                {
                    if (languagelist != null && languagelist.Count > 0)
                    {
                        sitemap = GetXml(languagelist);
                    }
                    else
                    {
                        sitemap = GetXml();
                    }
                    sitemapCache.Add(cachekey, sitemap);
                }
                return sitemap;
            }
            return string.Empty;
        }

        //single language
        private static string GetXml()
        {
            var homeitem = global::Sitecore.Context.Item.GetHomeItem();
            var detailList = homeitem.Axes.GetDescendants().ToList();
            detailList.Add(homeitem);

            var options = global::Sitecore.Links.LinkManager.GetDefaultUrlOptions();
            options.AlwaysIncludeServerUrl = true;
            return CreateSiteMapUrls(detailList, options);
        }

        //multi language
        private static string GetXml(List<string> languagelist)
        {
            Database db = global::Sitecore.Context.Database;
            string sitemapLinks = string.Empty;
            foreach (var language in languagelist)
            {
                Language currentSiteLanugage;
                if (Language.TryParse(language, out currentSiteLanugage))
                {
                    Sitecore.Context.SetLanguage(currentSiteLanugage, true);
                }
                var homeitem = global::Sitecore.Context.Item.GetHomeItem();
                var detailList = homeitem.Axes.GetDescendants().ToList();
                detailList.Add(homeitem);
                var options = global::Sitecore.Links.LinkManager.GetDefaultUrlOptions();
                options.AlwaysIncludeServerUrl = true;
                options.LanguageEmbedding = LanguageEmbedding.Always;
                options.Language = Language.Parse(language);
                options.EmbedLanguage(LanguageManager.GetLanguage(language));
                sitemapLinks += CreateSiteMapUrls(detailList, options);
            }
            return sitemapLinks;
        }

        private static string CreateSiteMapUrls(List<Item> detailList, UrlOptions urlOptions)
        {
            StringBuilder returnString = new StringBuilder();

            const string defaultpagechange = "daily";

            //Sitecore Fields eache page must contain this field.
            //var HideInSeoXmlSitemap = "Hide in SEO XML Sitemap";
            var XmlSitemapPriority = "XML Sitemap Priority";
            var XmlSitemapChangeFreq = "XML Sitemap Change Frequency";

            foreach (Item item in detailList)
            {
                if (item.GetDropLinkValue("Meta Robots", "Meta Content").ToUpper().StartsWith("INDEX"))
                {
                    var url = LinkManager.GetItemUrl(item, urlOptions);
                    if (url.ToLower().Contains("/custombla/"))
                    {
                        //optional some custom logic to hide your site specific pages.
                        continue; 
                    }
                    var prio = item.GetStringValue(XmlSitemapPriority);
                    var changefreq = item.GetStringValue(XmlSitemapChangeFreq);
                    if (string.IsNullOrEmpty(changefreq))
                    {
                        changefreq = defaultpagechange;
                    }
                    if (string.IsNullOrEmpty(prio))
                    {
                        returnString.AppendFormat("<url><loc>{0}</loc><changefreq>{1}</changefreq></url>", url, changefreq);
                    }
                    else
                    {
                        returnString.AppendFormat("<url><loc>{0}</loc><changefreq>{1}</changefreq><priority>{2}</priority></url>\n", url, changefreq, prio);
                    }
                }
            }
            return returnString.ToString();
        }

        public static Item GetHomeItem(this Item item)
        {
            global::Sitecore.Sites.SiteContext site = global::Sitecore.Context.Site;

            if (site == null)
            {
                return null;
            }

            global::Sitecore.Data.Database db = global::Sitecore.Context.Database;
            return db.GetItem(site.StartPath);
        }
    }
}

SitecoreItemExtensions.cs

using Sitecore.Data.Fields;
using Sitecore.Data.Items;

namespace SeoProcessor
{
    public static class SitecoreItemExtensions
    {
        public static string GetStringValue(this Item item, string fieldName)
        {
            if (item != null && item.Fields[fieldName] != null &&
              !string.IsNullOrEmpty(item.Fields[fieldName].Value))
            {
                return item.Fields[fieldName].Value;
            }
            return string.Empty;
        }

        public static string GetDropLinkValue(this Item item, string fieldName, string droplinkFieldName)
        {
            if (item != null && fieldName != null && item.Fields[fieldName] != null)
            {
                InternalLinkField linkField = item.Fields[fieldName];
                if (linkField.TargetItem != null)
                {
                    return linkField.TargetItem.Fields[droplinkFieldName].Value;
                }
            }
            return string.Empty;
        }
    }
}

And the config:
<?xml version="1.0" encoding="utf-8" ?>
<configuration xmlns:patch="http://www.sitecore.net/xmlconfig/"> <sitecore> <pipelines> <preprocessRequest> <processor type="Sitecore.Pipelines.PreprocessRequest.FilterUrlExtensions, Sitecore.Kernel"> <param desc="Allowed extensions (comma separated)">aspx, ashx, asmx, txt</param> </processor> </preprocessRequest> <httpRequestBegin> <processor type="SeoProcessor.SeoProcessor, SeoProcessor" patch:before="processor[@type='Sitecore.Pipelines.HttpRequest.UserResolver, Sitecore.Kernel']"/> </httpRequestBegin> </pipelines> </sitecore> </configuration>