Recently I had a chance to work on an interesting project where we needed not only to consume data from a non-Sitecore application through API, but also make it look like the data lives in Sitecore which means that urls had to be consistent with the rest of the Sitecore solution and search would bring both Sitecore and non-Sitecore results.
The solution that I came up with includes custom Solr (4.3.1) index for non-Sitecore data, two Solr cores that serve aggregate results from non-Sitecore core with master one and non-Sitecore core with web core, custom Sitecore Multilist with Search field that accepts _indexname parameter, custom crawler and a couple of other components.
The external data that needed to be indexed lived in a system called netFORUM. netFORUM has a very nice API, but the focus of this post series is on Sitecore solution, so I'm going to omit everything related to the actual data retrieval. Lets just say, there is a layer in Sitecore solution that does all the work of pulling in the data. There is class in the solution that holds all external object properties and is called ExternalProduct.
Here are the classes that were created to enable external data indexing:
Two config patch files were added to add sitecore_netforum_entities, sitecore_netforum_entities_rebuild, sitecore_globalsearch_master_index and sitecore_globalsearch_web_index.
The solution that I came up with includes custom Solr (4.3.1) index for non-Sitecore data, two Solr cores that serve aggregate results from non-Sitecore core with master one and non-Sitecore core with web core, custom Sitecore Multilist with Search field that accepts _indexname parameter, custom crawler and a couple of other components.
Part 1: Configuring Solr core for external data
First of all I would like to thank Cameron Palmer for his post on indexing of external data (http://www.awareweb.com/awareblog/9-30-14-indexingsitecore7). This post was instrumental for this solution.The external data that needed to be indexed lived in a system called netFORUM. netFORUM has a very nice API, but the focus of this post series is on Sitecore solution, so I'm going to omit everything related to the actual data retrieval. Lets just say, there is a layer in Sitecore solution that does all the work of pulling in the data. There is class in the solution that holds all external object properties and is called ExternalProduct.
Here are the classes that were created to enable external data indexing:
IndexableProductField.cs
public class IndexableProductField : IIndexableDataField
{
private object _concreteObject;
private PropertyInfo _fieldInfo;
public IndexableProductField(object concreteObject, PropertyInfo fieldInfo)
{
this._concreteObject = concreteObject;
this._fieldInfo = fieldInfo;
}
public Type FieldType
{
get { return _fieldInfo.PropertyType; }
}
public object Id
{
get { return _fieldInfo.Name.ToLower(); }
}
public string Name
{
get { return _fieldInfo.Name; }
}
public string TypeKey
{
get { return "string"; }
}
public object Value
{
get
{
return _fieldInfo.GetValue(_concreteObject);
}
}
}
IndexableProductEntity.cs
public class IndexableProductEntity : IIndexable, IIndexableBuiltinFields
{
private ExtendedProduct _entity;
private UrlProductType _productType;
private IEnumerable _fields;
private const string _databaseName = "netforum";
public virtual IIndexFieldStorageValueFormatter IndexFieldStorageValueFormatter { get; set; }
public IndexableProductEntity(ExtendedProduct entity, UrlProductType productType)
{
_entity = entity;
_fields = _entity.GetType()
.GetProperties(BindingFlags.Public
| BindingFlags.Instance
| BindingFlags.IgnoreCase)
.Select(fi => new IndexableProductField(_entity, fi));
_productType = productType;
}
public virtual string AbsolutePath
{
get { return "/"; }
}
public CultureInfo Culture
{
get { return CultureInfo.CurrentCulture; }
}
public virtual IEnumerable Fields
{
get { return _fields; }
}
public IIndexableDataField GetFieldById(object fieldId)
{
return _fields.FirstOrDefault(f => f.Id == fieldId);
}
public IIndexableDataField GetFieldByName(string fieldName)
{
return _fields.FirstOrDefault(f => f.Name.ToLower() == fieldName.ToLower());
}
public IIndexableId Id
{
get { return new IndexableId(_entity.Id); }
}
public void LoadAllFields()
{
_fields = _entity.GetType()
.GetProperties(BindingFlags.Public
| BindingFlags.Instance
| BindingFlags.IgnoreCase)
.Select(fi => new IndexableProductField(_entity, fi));
}
public IIndexableUniqueId UniqueId
{
get
{
var uri = new ItemUri(ID.Parse(_entity.Id).ToString(), Sitecore.Context.Language, Sitecore.Data.Version.First, _databaseName);
return new IndexableUniqueId(new SitecoreItemUniqueId(uri));
}
}
public virtual string DataSource
{
get { return "NetForumEntity"; }
}
public virtual Guid ProductTypeId
{
get
{
return _entity.ProductTypeId;
}
}
public virtual string ProductCode
{
get
{
return _entity.ProductCode;
}
}
public virtual string Name
{
get
{
return _entity.ProductName;
}
}
object IIndexableBuiltinFields.Group
{
get
{
return (object)_entity.Id;
}
}
bool IIndexableBuiltinFields.IsClone
{
get
{
return false;
}
}
int IIndexableBuiltinFields.Version
{
get
{
return 1;
}
}
bool IIndexableBuiltinFields.IsLatestVersion
{
get
{
return true;
}
set { }
}
object IIndexableBuiltinFields.TemplateId
{
get
{
return (object)this.ProductTypeId;
}
}
string IIndexableBuiltinFields.Language
{
get
{
return Sitecore.Context.Language.ToString();
}
}
string IIndexableBuiltinFields.Database
{
get
{
return _databaseName;
}
}
string IIndexableBuiltinFields.ID
{
get
{
return ShortID.Parse(_entity.Id.ToString()).ToString().ToLower();
}
}
public string CreatedBy
{
get { return "netForum"; }
}
public DateTime CreatedDate
{
get { return _entity.CatalogDate; }
}
public string DisplayName
{
get { return _entity.ProductName; }
}
public string FullPath
{
get
{
var urlFormat = Settings.GetSetting("ProductUrlFormat", "{0}/{1}/{2}/{3}");
return string.Format(urlFormat, _productType.ParentSiteItem.Paths.FullPath, _productType.Name, _entity.ProductCode, ItemUtil.ProposeValidItemName(_entity.ProductName));
}
}
public object Parent
{
get { return _productType.ParentSiteItem.ID; }
}
public IEnumerable Paths
{
get { return new List(); }
}
public string TemplateName
{
get { return _entity.ProductTypeCode; }
}
public string UpdatedBy
{
get { return "netForum"; }
}
public DateTime UpdatedDate
{
get { return _entity.CatalogDate; }
}
NetForumEntityCrawler.cs
public class NetForumEntityCrawler : FlatDataCrawler
{
private readonly IExtendedProductService _productService;
public NetForumEntityCrawler()
{
_productService = DependencyResolver.Current.GetService();
}
public NetForumEntityCrawler(IExtendedProductService productService)
{
_productService = productService;
}
protected override IEnumerable GetItemsToIndex()
{
var productTypes = UrlConfiguration.Instance(BusinessConstants.NetForumProducts.DatabaseName).ProductTypes;
if (productTypes != null && productTypes.Any())
{
var productTypeIds = productTypes.Select(pt=>pt.ProductTypeId);
return _productService.GetProductsByProductTypeIds(productTypeIds).Select(p => new IndexableProductEntity(p, productTypes.FirstOrDefault(pt => pt.ProductTypeId == p.ProductTypeId)));
}
return new List();
}
protected override IndexableProductEntity GetIndexable(IIndexableUniqueId indexableUniqueId)
{
var productType = UrlConfiguration.Instance(BusinessConstants.NetForumProducts.DatabaseName).ProductTypes;
if (productType != null && productType.Any())
{
var productTypeIds = productType.Select(pt => pt.ProductTypeId);
var product = _productService.GetProduct(((Guid)indexableUniqueId.Value));
if (product != null && productTypeIds.Contains(product.ProductTypeId))
{
return new IndexableProductEntity(product, productType.FirstOrDefault(pt => pt.ProductTypeId == product.ProductTypeId));
}
}
return null;
}
protected override IndexableProductEntity GetIndexableAndCheckDeletes(IIndexableUniqueId indexableUniqueId)
{
if (CustomLinkConfigurationUtilities.ProductTypes != null && CustomLinkConfigurationUtilities.ProductTypes.Any())
{
var product = _productService.GetProduct(((Guid)indexableUniqueId.Value));
if (product != null && CustomLinkConfigurationUtilities.ProductTypeIds.Contains(product.ProductTypeId))
{
return new IndexableProductEntity(product, CustomLinkConfigurationUtilities.ProductTypes.FirstOrDefault(pt => pt.ProductTypeId == product.ProductTypeId));
}
}
return null;
}
protected override IEnumerable GetIndexablesToUpdateOnDelete(IIndexableUniqueId indexableUniqueId)
{
if (CustomLinkConfigurationUtilities.ProductTypeIds != null && CustomLinkConfigurationUtilities.ProductTypeIds.Any())
{
var product = _productService.GetProduct(((Guid)indexableUniqueId.Value));
if (product != null && CustomLinkConfigurationUtilities.ProductTypeIds.Contains(product.ProductTypeId))
{
return new List() { indexableUniqueId };
}
}
return null;
}
protected override bool IndexUpdateNeedDelete(IndexableProductEntity indexable)
{
//Set to false in SitecoreItemCrawler
return true;
}
}
Two config patch files were added to add sitecore_netforum_entities, sitecore_netforum_entities_rebuild, sitecore_globalsearch_master_index and sitecore_globalsearch_web_index.
Sitecore.ContentSearch.Solr.Index.NetForumEntities.config
<?xml version="1.0" encoding="utf-8" ?>
<configuration xmlns:patch="http://www.sitecore.net/xmlconfig/">
<sitecore>
<contentSearch>
<configuration type="Sitecore.ContentSearch.ContentSearchConfiguration, Sitecore.ContentSearch">
<indexes hint="list:AddIndex">
<index id="sitecore_netforum_entities" type="Sitecore.ContentSearch.SolrProvider.SolrSearchIndex, Sitecore.ContentSearch.SolrProvider">
<param desc="name">$(id)</param>
<param desc="core">$(id)</param>
<param desc="propertyStore" ref="contentSearch/indexConfigurations/databasePropertyStore" param1="$(id)" />
<configuration ref="contentSearch/indexConfigurations/defaultSolrIndexConfiguration" />
<strategies hint="list:AddStrategy">
<strategy ref="contentSearch/indexConfigurations/indexUpdateStrategies/intervalAsyncNetForum" />
</strategies>
<locations hint="list:AddCrawler">
<crawler type="Custom.Business.ContentSearch.Indexing.Crawlers.NetForumEntityCrawler, Custom.Business">
</crawler>
</locations>
</index>
</indexes>
</configuration>
<indexConfigurations>
<defaultSolrIndexConfiguration type="Sitecore.ContentSearch.SolrProvider.SolrIndexConfiguration, Sitecore.ContentSearch.SolrProvider">
<fieldMap type="Sitecore.ContentSearch.SolrProvider.SolrFieldMap, Sitecore.ContentSearch.SolrProvider">
<typeMatches hint="raw:AddTypeMatch">
<typeMatch patch:after="*[@typeName='double']" typeName="decimal" type="System.Decimal" fieldNameFormat="{0}_s" settingType="Sitecore.ContentSearch.SolrProvider.SolrSearchFieldConfiguration, Sitecore.ContentSearch.SolrProvider" />
<typeMatch patch:after="*[@typeName='double']" typeName="productType" type="Custom.DomainObjects.NetForum.Commerce.ProductType,Custom.DomainObjects" fieldNameFormat="{0}_s" settingType="Sitecore.ContentSearch.SolrProvider.SolrSearchFieldConfiguration, Sitecore.ContentSearch.SolrProvider" />
<typeMatch patch:after="*[@typeName='double']" typeName="priceCollection" type="System.Collections.Generic.List`1[Custom.DomainObjects.NetForum.Commerce.Price,Custom.DomainObjects]" fieldNameFormat="{0}_sm" multiValued="true" settingType="Sitecore.ContentSearch.SolrProvider.SolrSearchFieldConfiguration, Sitecore.ContentSearch.SolrProvider" />
<typeMatch patch:after="*[@typeName='double']" typeName="price" type="Custom.DomainObjects.NetForum.Commerce.Price,Custom.DomainObjects" fieldNameFormat="{0}_s" settingType="Sitecore.ContentSearch.SolrProvider.SolrSearchFieldConfiguration, Sitecore.ContentSearch.SolrProvider" />
</typeMatches>
<fieldNames hint="raw:AddFieldByFieldName">
<field fieldName="ProductType" returnType="string" />
<field fieldName="ThumbnailUrl" returnType="string" />
<field fieldName="YearPublished" returnType="string" />
</fieldNames>
</fieldMap>
</defaultSolrIndexConfiguration>
</indexConfigurations>
</contentSearch>
</sitecore>
</configuration>
Sitecore.ContentSearch.Solr.Index.GlobalSearch.config
<?xml version="1.0" encoding="utf-8" ?>
<configuration xmlns:patch="http://www.sitecore.net/xmlconfig/">
<sitecore>
<contentSearch>
<configuration type="Sitecore.ContentSearch.ContentSearchConfiguration, Sitecore.ContentSearch">
<indexes hint="list:AddIndex">
<index id="sitecore_globalsearch_master_index" type="Sitecore.ContentSearch.SolrProvider.SolrSearchIndex, Sitecore.ContentSearch.SolrProvider">
<param desc="name">$(id)</param>
<param desc="core">$(id)</param>
<param desc="propertyStore" ref="contentSearch/indexConfigurations/databasePropertyStore" param1="$(id)" />
<configuration ref="contentSearch/indexConfigurations/defaultSolrIndexConfiguration" />
<strategies hint="list:AddStrategy">
<strategy ref="contentSearch/indexConfigurations/indexUpdateStrategies/manual" />
</strategies>
</index>
</indexes>
<indexes hint="list:AddIndex">
<index id="sitecore_globalsearch_web_index" type="Sitecore.ContentSearch.SolrProvider.SolrSearchIndex, Sitecore.ContentSearch.SolrProvider">
<param desc="name">$(id)</param>
<param desc="core">$(id)</param>
<param desc="propertyStore" ref="contentSearch/indexConfigurations/databasePropertyStore" param1="$(id)" />
<configuration ref="contentSearch/indexConfigurations/defaultSolrIndexConfiguration" />
<strategies hint="list:AddStrategy">
<strategy ref="contentSearch/indexConfigurations/indexUpdateStrategies/manual" />
</strategies>
</index>
</indexes>
</configuration>
</contentSearch>
</sitecore>
</configuration>
Solr Configurations
On the Solr side netFORUM schema.xml and solrconfig.xml files were the same as the rest of the cores. However Global Search core had a couple of differences in solrconfig.xml.
In the select handler I added shard configuration that tricks Solr to think that it searches across shards, while it merely combines two cores and produces aggregate results.
In the select handler I added shard configuration that tricks Solr to think that it searches across shards, while it merely combines two cores and produces aggregate results.
<requestHandler name="/select" class="solr.SearchHandler">
<!-- default values for query parameters can be specified, these
will be overridden by parameters in the request
-->
<lst name="defaults">
<str name="echoParams">explicit</str>
<int name="rows">100</int>
<str name="df">text</str>
<bool name="terms">true</bool>
<str name="spellcheck">true</str>
<str name="spellcheck.collate">true</str>
<str name="spellcheck.extendedResults">true</str>
<str name="shards">localhost:8983/solr/sitecore_master_index,localhost:8983/solr/sitecore_netforum_entities</str>
<str name="shards.qt">select</str>
</lst>
<arr name="last-components">
<str>terms</str>
<str>spellcheck</str>
</arr>
</requestHandler>
<!-- default values for query parameters can be specified, these
will be overridden by parameters in the request
-->
<lst name="defaults">
<str name="echoParams">explicit</str>
<int name="rows">100</int>
<str name="df">text</str>
<bool name="terms">true</bool>
<str name="spellcheck">true</str>
<str name="spellcheck.collate">true</str>
<str name="spellcheck.extendedResults">true</str>
<str name="shards">localhost:8983/solr/sitecore_master_index,localhost:8983/solr/sitecore_netforum_entities</str>
<str name="shards.qt">select</str>
</lst>
<arr name="last-components">
<str>terms</str>
<str>spellcheck</str>
</arr>
</requestHandler>
No comments:
Post a Comment