I am using Groupdocs.Search to highlight phrases in the document (Word, excel, Powerpoint, pdf), but the text is not getting highlighted in almost all the files.
sharing my code and the document file for your reference.
Program p = new Program();
var storagePath = p.GetStoragePath();
string basePath = storagePath + "\\Search";
if (!Directory.Exists(basePath))
{
Directory.CreateDirectory(basePath);
}
var fileGuidWithStoragePath = Path.Combine(storagePath, request.GUID);
var fileGuidWithSearchPath = Path.Combine(basePath, request.GUID);
if (System.IO.File.Exists(fileGuidWithSearchPath))
{
System.IO.File.Delete(fileGuidWithSearchPath);
}
File.Copy(fileGuidWithStoragePath, fileGuidWithSearchPath);
string viewerCacheFolderPath = basePath + @"\Cache";
string indexFolder = basePath + @"\Index";
string documentsFolder = basePath;
string query = request.TextData;
// Creating an index in the specified folder
Index index = new Index(indexFolder);
// Indexing documents from the specified folder
index.Add(documentsFolder);
// Search in index
GroupDocs.Search.Options.SearchOptions searchOptions = new GroupDocs.Search.Options.SearchOptions();
searchOptions.UseCaseSensitiveSearch = false; // Adjust based on your requirement
SearchResult result = index.Search(query, searchOptions);
//Utils.TraceResult(query, result);
// Generating HTML
FoundDocument foundDocument = null;
// Iterate through the found documents
int documentCount = result.DocumentCount;
if (documentCount == 0)
{
return new SPResponse() { ReturnStatus = "-1" };
}
for (int i = 0; i < documentCount; i++)
{
FoundDocument document = result.GetFoundDocument(i);
if (document.DocumentInfo.FilePath.Equals(fileGuidWithSearchPath, StringComparison.OrdinalIgnoreCase))
{
foundDocument = document;
break;
}
}
var documentGuid = foundDocument.DocumentInfo.FilePath;
var fileFolderName = Path.GetFileName(documentGuid).Replace(".", "_");
string cachePath = Path.Combine(p.GetStoragePath(), "Search");
cachePath = Path.Combine(cachePath, "Cache");
//cachePath = Path.Combine(cachePath, fileFolderName);
string fileCacheSubFolder = Path.Combine(cachePath, fileFolderName);
IViewerCache cache = new FileViewerCache(cachePath, fileCacheSubFolder);
LoadDocumentEntity loadDocumentEntity;
using (HtmlViewer htmlViewer = new HtmlViewer(documentGuid, cache, GetLoadOptions("")))
{
loadDocumentEntity = GetLoadDocumentEntity(true, documentGuid, fileCacheSubFolder, htmlViewer, viewerCacheFolderPath);
}
IndexedFileInfo fileInfo = new IndexedFileInfo(viewerCacheFolderPath, foundDocument.DocumentInfo.FilePath);
HighlightService highlightService = new HighlightService(fileInfo, null, cache);
// Highlighting in HTML
highlightService.Highlight(foundDocument, index.Dictionaries.Alphabet, true);
My highlight function
foreach (var page in _pages)
{
string pageFilePath = string.Empty;
if (isHtmlMode)
{
pageFilePath = _fileInfo.GetHtmlPageFilePath(page.Number);
}
var text = File.ReadAllText(pageFilePath);
//HtmlDocument htmlDoc = new HtmlDocument();
//htmlDoc.LoadHtml(text);
//string textContent = htmlDoc.DocumentNode.InnerText;
var result = HtmlHighlighter.Handle(
text,
false,
alphabet,
foundDocument.Terms,
foundDocument.TermSequences);
int index = result.IndexOf(Key);
if (index > 0 && index + Key.Length < result.Length)
{
result = result.Insert(index + Key.Length, HighlightStyle);
}
File.WriteAllText(pageFilePath, result);
}
int index = result.IndexOf(Key); //here the key is coming -1 in most of the cases
if (documentCount == 0)//the document count come 0 when we search phrase
My complete code in highlight service →
internal class HighlightService
{
private const string Key = "<style>";
private const string HighlightStyle = @".highlighted-term { background-color:#ADFF2F; } ";
private readonly IndexedFileInfo _fileInfo;
private readonly string _password;
private IList<Page> _pages;
public HighlightService(
IndexedFileInfo fileInfo,
string password, IViewerCache cache)
{
_fileInfo = fileInfo;
_password = password;
using (var htmlViewer = new HtmlViewer(_fileInfo, cache, _password))
{
_pages = htmlViewer.GetPages();
foreach (var page in _pages)
{
htmlViewer.CreateCacheForPage(page.Number);
}
}
}
public void Highlight(FoundDocument foundDocument, Alphabet alphabet, bool isHtmlMode = true)
{
foreach (var page in _pages)
{
string pageFilePath = string.Empty;
if (isHtmlMode)
{
pageFilePath = _fileInfo.GetHtmlPageFilePath(page.Number);
}
var text = File.ReadAllText(pageFilePath);
//HtmlDocument htmlDoc = new HtmlDocument();
//htmlDoc.LoadHtml(text);
//string textContent = htmlDoc.DocumentNode.InnerText;
var result = HtmlHighlighter.Handle(
text,
false,
alphabet,
foundDocument.Terms,
foundDocument.TermSequences);
int index = result.IndexOf(Key);
if (index > 0 && index + Key.Length < result.Length)
{
result = result.Insert(index + Key.Length, HighlightStyle);
}
File.WriteAllText(pageFilePath, result);
}
}
}
Not Working online on Groupdocs website also (1.8 MB)
Working fine online but not working with my code (11.7 KB)
I am Searching “Proposal” in the search term