I cannot get Excel 2003 XML format to convert to CSV. Is this format supported?
Could you please share the problematic file and the sample code you are using for the conversion? We’ll then investigate this scenario at our end.
See below. I have the Code, Input File, and the Output File. The file is viewable in Excel.
**** HERE IS THE CODE ****
using GroupDocs.Conversion.FileTypes;
using GroupDocs.Conversion.Options.Convert;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Azure.WebJobs;
using Microsoft.Azure.WebJobs.Extensions.Http;
using Microsoft.Extensions.Logging;
using System;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace ExcelFunctions
{
public static class ExcelToDelimited
{
public static Stream GetSourceFile(HttpRequest req)
{
MemoryStream memoryStream = new MemoryStream();
req.Body.CopyTo(memoryStream);
memoryStream.Position = 0;
return memoryStream;
}
[FunctionName("ExcelToDelimited")]
public static async Task<IActionResult> Run(
[HttpTrigger(AuthorizationLevel.Function, "post", Route = null)] HttpRequest req,
ILogger log)
{
log.LogInformation("C# HTTP trigger function processed a request.");
// Get the file name and extension from the header.
string headerData = new(req.Headers["FileName"].ToString());
string fileName = string.Empty;
string fileExtension = string.Empty;
// Get the delimiter from the header.
string delimiter = req.Headers["Delimiter"].ToString();
if (string.IsNullOrEmpty(delimiter))
{
delimiter = ",";
}
try
{
// Disect the file name and extension.
if (headerData.LastIndexOf('.') != -1)
{
fileName = headerData[..headerData.LastIndexOf('.')];
fileExtension = headerData[(headerData.LastIndexOf('.') + 1)..].ToLower();
}
// Initially convert to an xlsx file.
MemoryStream sourceFile = new();
using var converter = new GroupDocs.Conversion.Converter(() => GetSourceFile(req));
SpreadsheetConvertOptions options = new();
options.Format = SpreadsheetFileType.Tsv;
converter.Convert(() => sourceFile, options);
sourceFile.Close();
// Reopen the converted xlsx file.
byte[] buffer = sourceFile.ToArray();
MemoryStream delimitedFile = new(buffer);
using (StreamReader reader = new StreamReader(delimitedFile))
{
string content = reader.ReadToEnd();
reader.Close();
content = Regex.Replace(content, "\t", delimiter);
buffer = Encoding.ASCII.GetBytes(content);
}
// Return the delimited file.
FileContentResult resultFile = new(buffer, "text/csv");
resultFile.FileDownloadName = fileName + ".csv";
return resultFile;
}
catch (Exception ex)
{
return new BadRequestObjectResult(ex);
}
}
}
}
**** HERE IS THE INPUT ****
<?xml version="1.0"?>
<?mso-application progid="Excel.Sheet"?>
<Workbook xmlns="urn:schemas-microsoft-com:office:spreadsheet"
xmlns:o="urn:schemas-microsoft-com:office:office"
xmlns:x="urn:schemas-microsoft-com:office:excel"
xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet"
xmlns:html="http://www.w3.org/TR/REC-html40">
<DocumentProperties xmlns="urn:schemas-microsoft-com:office:office">
<Author>Chris Elrod</Author>
<LastAuthor>Chris Elrod</LastAuthor>
<Created>2022-02-04T18:06:39Z</Created>
<LastSaved>2022-02-04T18:07:59Z</LastSaved>
<Version>16.00</Version>
</DocumentProperties>
<OfficeDocumentSettings xmlns="urn:schemas-microsoft-com:office:office">
<AllowPNG/>
</OfficeDocumentSettings>
<ExcelWorkbook xmlns="urn:schemas-microsoft-com:office:excel">
<WindowHeight>11145</WindowHeight>
<WindowWidth>21600</WindowWidth>
<WindowTopX>2295</WindowTopX>
<WindowTopY>2295</WindowTopY>
<ProtectStructure>False</ProtectStructure>
<ProtectWindows>False</ProtectWindows>
</ExcelWorkbook>
<Styles>
<Style ss:ID="Default" ss:Name="Normal">
<Alignment ss:Vertical="Bottom"/>
<Borders/>
<Font ss:FontName="Calibri" x:Family="Swiss" ss:Size="11" ss:Color="#000000"/>
<Interior/>
<NumberFormat/>
<Protection/>
</Style>
<Style ss:ID="s16">
<NumberFormat ss:Format="Short Date"/>
</Style>
</Styles>
<Worksheet ss:Name="Sheet1">
<Table ss:ExpandedColumnCount="3" ss:ExpandedRowCount="4" x:FullColumns="1"
x:FullRows="1" ss:DefaultColumnWidth="51" ss:DefaultRowHeight="14.25">
<Column ss:Index="2" ss:Width="54.75"/>
<Row>
<Cell><Data ss:Type="String">Name</Data></Cell>
<Cell><Data ss:Type="String">Date</Data></Cell>
<Cell><Data ss:Type="String">Location</Data></Cell>
</Row>
<Row>
<Cell><Data ss:Type="String">Joe Smith</Data></Cell>
<Cell ss:StyleID="s16"><Data ss:Type="DateTime">2022-02-03T00:00:00.000</Data></Cell>
<Cell><Data ss:Type="String">Greenville</Data></Cell>
</Row>
<Row>
<Cell><Data ss:Type="String">John Doe</Data></Cell>
<Cell ss:StyleID="s16"><Data ss:Type="DateTime">2021-03-01T00:00:00.000</Data></Cell>
<Cell><Data ss:Type="String">Phoenix</Data></Cell>
</Row>
<Row>
<Cell><Data ss:Type="String">Jane Robinson</Data></Cell>
<Cell ss:StyleID="s16"><Data ss:Type="DateTime">2020-04-14T00:00:00.000</Data></Cell>
<Cell><Data ss:Type="String">Houston</Data></Cell>
</Row>
</Table>
<WorksheetOptions xmlns="urn:schemas-microsoft-com:office:excel">
<PageSetup>
<Header x:Margin="0.3"/>
<Footer x:Margin="0.3"/>
<PageMargins x:Bottom="0.75" x:Left="0.7" x:Right="0.7" x:Top="0.75"/>
</PageSetup>
<Selected/>
<Panes>
<Pane>
<Number>3</Number>
<ActiveRow>3</ActiveRow>
<ActiveCol>2</ActiveCol>
</Pane>
</Panes>
<ProtectObjects>False</ProtectObjects>
<ProtectScenarios>False</ProtectScenarios>
</WorksheetOptions>
</Worksheet>
</Workbook>
**** HERE IS THE OUTPUT ****
<?xml version=""1.0""?>
"<?mso-application progid=""Excel.Sheet""?>"
"<Workbook xmlns=""urn:schemas-microsoft-com:office:spreadsheet""
xmlns:o=""urn:schemas-microsoft-com:office:office""
xmlns:x=""urn:schemas-microsoft-com:office:excel""
xmlns:ss=""urn:schemas-microsoft-com:office:spreadsheet""
xmlns:html=""http://www.w3.org/TR/REC-html40"">"
"<DocumentProperties xmlns=""urn:schemas-microsoft-com:office:office"">"
<Author>Chris Elrod</Author>
<LastAuthor>Chris Elrod</LastAuthor>
<Created>2022-02-04T18:06:39Z</Created>
<LastSaved>2022-02-04T18:07:59Z</LastSaved>
<Version>16.00</Version>
</DocumentProperties>
"<OfficeDocumentSettings xmlns=""urn:schemas-microsoft-com:office:office"">
<AllowPNG/>"
</OfficeDocumentSettings>
"<ExcelWorkbook xmlns=""urn:schemas-microsoft-com:office:excel"">"
<WindowHeight>11145</WindowHeight>
<WindowWidth>21600</WindowWidth>
<WindowTopX>2295</WindowTopX>
<WindowTopY>2295</WindowTopY>
<ProtectStructure>False</ProtectStructure>
<ProtectWindows>False</ProtectWindows>
</ExcelWorkbook>
<Styles>
"<Style ss:ID=""Default"" ss:Name=""Normal"">"
"<Alignment ss:Vertical=""Bottom""/>"
<Borders/>
"<Font ss:FontName=""Calibri"" x:Family=""Swiss"" ss:Size=""11"""
"ss:Color=""#000000""/>"
<Interior/>
<NumberFormat/>
<Protection/>
</Style>
"<Style ss:ID=""s16"">"
"<NumberFormat ss:Format=""Short Date""/>"
</Style>
</Styles>
"<Worksheet ss:Name=""Sheet1"">"
"<Table ss:ExpandedColumnCount=""3"" ss:ExpandedRowCount=""4"""
"x:FullColumns=""1"""
"x:FullRows=""1"" ss:DefaultColumnWidth=""51"" ss:DefaultRowHeight=""14.25"">
<Column ss:Index=""2"" ss:Width=""54.75""/>"
<Row>
"<Cell><Data ss:Type=""String"">Name</Data></Cell>"
"<Cell><Data ss:Type=""String"">Date</Data></Cell>"
"<Cell><Data ss:Type=""String"">Location</Data></Cell>"
</Row>
<Row>
"<Cell><Data ss:Type=""String"">Joe Smith</Data></Cell>"
"<Cell ss:StyleID=""s16""><Data ss:Type=""DateTime"">2022-02-
03T00:00:00.000</Data></Cell>"
"<Cell><Data ss:Type=""String"">Greenville</Data></Cell>"
</Row>
<Row>
"<Cell><Data ss:Type=""String"">John Doe</Data></Cell>"
"<Cell ss:StyleID=""s16""><Data ss:Type=""DateTime"">2021-03-
01T00:00:00.000</Data></Cell>"
"<Cell><Data ss:Type=""String"">Phoenix</Data></Cell>"
</Row>
<Row>
"<Cell><Data ss:Type=""String"">Jane Robinson</Data></Cell>"
"<Cell ss:StyleID=""s16""><Data ss:Type=""DateTime"">2020-04-
14T00:00:00.000</Data></Cell>"
"<Cell><Data ss:Type=""String"">Houston</Data></Cell>"
</Row>
</Table>
"<WorksheetOptions xmlns=""urn:schemas-microsoft-com:office:excel"">
<PageSetup>"
"<Header x:Margin=""0.3""/>"
"<Footer x:Margin=""0.3""/>"
"<PageMargins x:Bottom=""0.75"" x:Left=""0.7"" x:Right=""0.7"" x:Top=""0.75""/>"
</PageSetup>
<Selected/>
<Panes>
<Pane>
<Number>3</Number>
<ActiveRow>3</ActiveRow>
<ActiveCol>2</ActiveCol>
</Pane>
</Panes>
<ProtectObjects>False</ProtectObjects>
<ProtectScenarios>False</ProtectScenarios>
</WorksheetOptions>
</Worksheet>
</Workbook>
We successfully reproduced this issue at our end. The resultant CSV is not well formatted. Therefore, we’ve logged it in our internal issue tracking system with ID CONVERSIONNET-5093. It’ll be now further investigated. You’ll be notified in case of any progress update.
The issues you have found earlier (filed as CONVERSIONNET-5093) have been fixed in this update. This message was posted using Bugs notification tool by nikola.yankov