Excel 2003 XML Format

I cannot get Excel 2003 XML format to convert to CSV. Is this format supported?

@Chris1962

Could you please share the problematic file and the sample code you are using for the conversion? We’ll then investigate this scenario at our end.

See below. I have the Code, Input File, and the Output File. The file is viewable in Excel.

**** HERE IS THE CODE ****

using GroupDocs.Conversion.FileTypes;
using GroupDocs.Conversion.Options.Convert;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Azure.WebJobs;
using Microsoft.Azure.WebJobs.Extensions.Http;
using Microsoft.Extensions.Logging;
using System;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

namespace ExcelFunctions
{
    public static class ExcelToDelimited
    {

        public static Stream GetSourceFile(HttpRequest req)
        {
            MemoryStream memoryStream = new MemoryStream();
            req.Body.CopyTo(memoryStream);
            memoryStream.Position = 0;
            return memoryStream;
        }

        [FunctionName("ExcelToDelimited")]
        public static async Task<IActionResult> Run(
            [HttpTrigger(AuthorizationLevel.Function, "post", Route = null)] HttpRequest req,
            ILogger log)
        {
            log.LogInformation("C# HTTP trigger function processed a request.");

            // Get the file name and extension from the header.
            string headerData = new(req.Headers["FileName"].ToString());
            string fileName = string.Empty;
            string fileExtension = string.Empty;

            // Get the delimiter from the header.
            string delimiter = req.Headers["Delimiter"].ToString();
            if (string.IsNullOrEmpty(delimiter))
            {
                delimiter = ",";
            }

            try
            {
                // Disect the file name and extension.
                if (headerData.LastIndexOf('.') != -1)
                {
                    fileName = headerData[..headerData.LastIndexOf('.')];
                    fileExtension = headerData[(headerData.LastIndexOf('.') + 1)..].ToLower();
                }

                // Initially convert to an xlsx file.
                MemoryStream sourceFile = new();
                using var converter = new GroupDocs.Conversion.Converter(() => GetSourceFile(req));
                SpreadsheetConvertOptions options = new();
                options.Format = SpreadsheetFileType.Tsv;
                converter.Convert(() => sourceFile, options);
                sourceFile.Close();

                // Reopen the converted xlsx file.
                byte[] buffer = sourceFile.ToArray();
                MemoryStream delimitedFile = new(buffer);

                using (StreamReader reader = new StreamReader(delimitedFile))
                {
                    string content = reader.ReadToEnd();
                    reader.Close();
                    content = Regex.Replace(content, "\t", delimiter);
                    buffer = Encoding.ASCII.GetBytes(content);
                }

                // Return the delimited file.
                FileContentResult resultFile = new(buffer, "text/csv");
                resultFile.FileDownloadName = fileName + ".csv";
                return resultFile;
            }
            catch (Exception ex)
            {
                return new BadRequestObjectResult(ex);
            }
        }
    }
}

**** HERE IS THE INPUT ****

<?xml version="1.0"?>
<?mso-application progid="Excel.Sheet"?>
<Workbook xmlns="urn:schemas-microsoft-com:office:spreadsheet"
 xmlns:o="urn:schemas-microsoft-com:office:office"
 xmlns:x="urn:schemas-microsoft-com:office:excel"
 xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet"
 xmlns:html="http://www.w3.org/TR/REC-html40">
 <DocumentProperties xmlns="urn:schemas-microsoft-com:office:office">
  <Author>Chris Elrod</Author>
  <LastAuthor>Chris Elrod</LastAuthor>
  <Created>2022-02-04T18:06:39Z</Created>
  <LastSaved>2022-02-04T18:07:59Z</LastSaved>
  <Version>16.00</Version>
 </DocumentProperties>
 <OfficeDocumentSettings xmlns="urn:schemas-microsoft-com:office:office">
  <AllowPNG/>
 </OfficeDocumentSettings>
 <ExcelWorkbook xmlns="urn:schemas-microsoft-com:office:excel">
  <WindowHeight>11145</WindowHeight>
  <WindowWidth>21600</WindowWidth>
  <WindowTopX>2295</WindowTopX>
  <WindowTopY>2295</WindowTopY>
  <ProtectStructure>False</ProtectStructure>
  <ProtectWindows>False</ProtectWindows>
 </ExcelWorkbook>
 <Styles>
  <Style ss:ID="Default" ss:Name="Normal">
   <Alignment ss:Vertical="Bottom"/>
   <Borders/>
   <Font ss:FontName="Calibri" x:Family="Swiss" ss:Size="11" ss:Color="#000000"/>
   <Interior/>
   <NumberFormat/>
   <Protection/>
  </Style>
  <Style ss:ID="s16">
   <NumberFormat ss:Format="Short Date"/>
  </Style>
 </Styles>
 <Worksheet ss:Name="Sheet1">
  <Table ss:ExpandedColumnCount="3" ss:ExpandedRowCount="4" x:FullColumns="1"
   x:FullRows="1" ss:DefaultColumnWidth="51" ss:DefaultRowHeight="14.25">
   <Column ss:Index="2" ss:Width="54.75"/>
   <Row>
    <Cell><Data ss:Type="String">Name</Data></Cell>
    <Cell><Data ss:Type="String">Date</Data></Cell>
    <Cell><Data ss:Type="String">Location</Data></Cell>
   </Row>
   <Row>
    <Cell><Data ss:Type="String">Joe Smith</Data></Cell>
    <Cell ss:StyleID="s16"><Data ss:Type="DateTime">2022-02-03T00:00:00.000</Data></Cell>
    <Cell><Data ss:Type="String">Greenville</Data></Cell>
   </Row>
   <Row>
    <Cell><Data ss:Type="String">John Doe</Data></Cell>
    <Cell ss:StyleID="s16"><Data ss:Type="DateTime">2021-03-01T00:00:00.000</Data></Cell>
    <Cell><Data ss:Type="String">Phoenix</Data></Cell>
   </Row>
   <Row>
    <Cell><Data ss:Type="String">Jane Robinson</Data></Cell>
    <Cell ss:StyleID="s16"><Data ss:Type="DateTime">2020-04-14T00:00:00.000</Data></Cell>
    <Cell><Data ss:Type="String">Houston</Data></Cell>
   </Row>
  </Table>
  <WorksheetOptions xmlns="urn:schemas-microsoft-com:office:excel">
   <PageSetup>
    <Header x:Margin="0.3"/>
    <Footer x:Margin="0.3"/>
    <PageMargins x:Bottom="0.75" x:Left="0.7" x:Right="0.7" x:Top="0.75"/>
   </PageSetup>
   <Selected/>
   <Panes>
    <Pane>
     <Number>3</Number>
     <ActiveRow>3</ActiveRow>
     <ActiveCol>2</ActiveCol>
    </Pane>
   </Panes>
   <ProtectObjects>False</ProtectObjects>
   <ProtectScenarios>False</ProtectScenarios>
  </WorksheetOptions>
 </Worksheet>
</Workbook>

**** HERE IS THE OUTPUT ****

<?xml version=""1.0""?>
"<?mso-application progid=""Excel.Sheet""?>"
"<Workbook xmlns=""urn:schemas-microsoft-com:office:spreadsheet""
xmlns:o=""urn:schemas-microsoft-com:office:office""
xmlns:x=""urn:schemas-microsoft-com:office:excel""
xmlns:ss=""urn:schemas-microsoft-com:office:spreadsheet""
xmlns:html=""http://www.w3.org/TR/REC-html40"">"
"<DocumentProperties xmlns=""urn:schemas-microsoft-com:office:office"">"
<Author>Chris Elrod</Author>
<LastAuthor>Chris Elrod</LastAuthor>
<Created>2022-02-04T18:06:39Z</Created>
<LastSaved>2022-02-04T18:07:59Z</LastSaved>
<Version>16.00</Version>
</DocumentProperties>
"<OfficeDocumentSettings xmlns=""urn:schemas-microsoft-com:office:office"">
<AllowPNG/>"
</OfficeDocumentSettings>
"<ExcelWorkbook xmlns=""urn:schemas-microsoft-com:office:excel"">"
<WindowHeight>11145</WindowHeight>
<WindowWidth>21600</WindowWidth>
<WindowTopX>2295</WindowTopX>
<WindowTopY>2295</WindowTopY>
<ProtectStructure>False</ProtectStructure>
<ProtectWindows>False</ProtectWindows>
</ExcelWorkbook>
<Styles>
"<Style ss:ID=""Default"" ss:Name=""Normal"">"
"<Alignment ss:Vertical=""Bottom""/>"
<Borders/>
"<Font ss:FontName=""Calibri"" x:Family=""Swiss"" ss:Size=""11"""
"ss:Color=""#000000""/>"
<Interior/>
<NumberFormat/>
<Protection/>
</Style>
"<Style ss:ID=""s16"">"
"<NumberFormat ss:Format=""Short Date""/>"
</Style>
</Styles>
"<Worksheet ss:Name=""Sheet1"">"
"<Table ss:ExpandedColumnCount=""3"" ss:ExpandedRowCount=""4"""
"x:FullColumns=""1"""
"x:FullRows=""1"" ss:DefaultColumnWidth=""51"" ss:DefaultRowHeight=""14.25"">
<Column ss:Index=""2"" ss:Width=""54.75""/>"
<Row>
"<Cell><Data ss:Type=""String"">Name</Data></Cell>"
"<Cell><Data ss:Type=""String"">Date</Data></Cell>"
"<Cell><Data ss:Type=""String"">Location</Data></Cell>"
</Row>
<Row>
"<Cell><Data ss:Type=""String"">Joe Smith</Data></Cell>"
"<Cell ss:StyleID=""s16""><Data ss:Type=""DateTime"">2022-02-
03T00:00:00.000</Data></Cell>"
"<Cell><Data ss:Type=""String"">Greenville</Data></Cell>"
</Row>
<Row>
"<Cell><Data ss:Type=""String"">John Doe</Data></Cell>"
"<Cell ss:StyleID=""s16""><Data ss:Type=""DateTime"">2021-03-
01T00:00:00.000</Data></Cell>"
"<Cell><Data ss:Type=""String"">Phoenix</Data></Cell>"
</Row>
<Row>
"<Cell><Data ss:Type=""String"">Jane Robinson</Data></Cell>"
"<Cell ss:StyleID=""s16""><Data ss:Type=""DateTime"">2020-04-
14T00:00:00.000</Data></Cell>"
"<Cell><Data ss:Type=""String"">Houston</Data></Cell>"
</Row>
</Table>
"<WorksheetOptions xmlns=""urn:schemas-microsoft-com:office:excel"">
<PageSetup>"
"<Header x:Margin=""0.3""/>"
"<Footer x:Margin=""0.3""/>"
"<PageMargins x:Bottom=""0.75"" x:Left=""0.7"" x:Right=""0.7"" x:Top=""0.75""/>"
</PageSetup>
<Selected/>
<Panes>
<Pane>
<Number>3</Number>
<ActiveRow>3</ActiveRow>
<ActiveCol>2</ActiveCol>
</Pane>
</Panes>
<ProtectObjects>False</ProtectObjects>
<ProtectScenarios>False</ProtectScenarios>
</WorksheetOptions>
</Worksheet>
</Workbook>
1 Like

@Chris1962

We successfully reproduced this issue at our end. The resultant CSV is not well formatted. Therefore, we’ve logged it in our internal issue tracking system with ID CONVERSIONNET-5093. It’ll be now further investigated. You’ll be notified in case of any progress update.