I am splitting the files and on each page I am extracting text using GroupDocs Parser, now the problem is when I am trying to do the same with .msg file I am getting the error “Unsupported file Type”. below is my code
using (Merger merger = new Merger(mStream))
{
// Extract the page to a MemoryStream
using (MemoryStream pageStream = new MemoryStream())
{
merger.ExtractPages(extractOptions);
merger.Save(pageStream);
// Reset the stream position to the beginning
pageStream.Seek(0, SeekOrigin.Begin);
// Extract text from the page stream
WriteLog.WriteToLog("Extracting text using GD ...");
WriteLog.WriteToLog("Page " + pageIndex + " extracting");
string strCurrentPage = string.Empty;
try
{
strCurrentPage = ExtractTextAll(pageStream, false).Trim();
}
catch (Exception ex)
{
WriteLog.WriteToLog($"Error processing page {pageIndex}: {ex.Message}");
strCurrentPage = "";
}
// Append the extracted text and insert into the database
strTextData.Append(strCurrentPage);
InsertSinglePage.InsertPageNum(pageIndex, strCurrentPage, fileVersionId);
}
}
private string ExtractTextAll(MemoryStream stream, bool formatted)
{
// Reset the stream position to the beginning
stream.Seek(0, SeekOrigin.Begin);
// Initialize the parser and text extraction
using (var parser = new GroupDocs.Parser.Parser(stream))
{
// Extract text from the document
var text = parser.GetText(new GroupDocs.Parser.Options.TextOptions(true));
// Return the extracted text
return text?.ReadToEnd();
}
}
can we have any workaround here?