let
fn = (openXmlFileBinary as binary) as table =>
let
// unzip function source: https://github.com/ibarrau/PowerBi-code/blob/master/PowerQuery/ExtractZIP.pq
UnzipFile = (ZIPFile as binary) =>
let
Header = BinaryFormat.Record([ Signature = BinaryFormat.ByteOrder(BinaryFormat.UnsignedInteger32,ByteOrder.LittleEndian),
Version = BinaryFormat.ByteOrder(BinaryFormat.UnsignedInteger16,ByteOrder.LittleEndian),
Flags = BinaryFormat.ByteOrder(BinaryFormat.UnsignedInteger16,ByteOrder.LittleEndian),
Compression = BinaryFormat.ByteOrder(BinaryFormat.UnsignedInteger16,ByteOrder.LittleEndian),
ModTime = BinaryFormat.ByteOrder(BinaryFormat.UnsignedInteger16,ByteOrder.LittleEndian),
ModDate = BinaryFormat.ByteOrder(BinaryFormat.UnsignedInteger16,ByteOrder.LittleEndian),
CRC32 = BinaryFormat.ByteOrder(BinaryFormat.UnsignedInteger32,ByteOrder.LittleEndian),
CompressedSize = BinaryFormat.ByteOrder(BinaryFormat.UnsignedInteger32,ByteOrder.LittleEndian),
UncompressedSize = BinaryFormat.ByteOrder(BinaryFormat.UnsignedInteger32,ByteOrder.LittleEndian),
FileNameLen = BinaryFormat.ByteOrder(BinaryFormat.UnsignedInteger16,ByteOrder.LittleEndian),
ExtraFieldLen = BinaryFormat.ByteOrder(BinaryFormat.UnsignedInteger16,ByteOrder.LittleEndian)
]),
FileEntry = BinaryFormat.Choice(Header, each if _[Signature] <> 0x4034B50 then BinaryFormat.Null else
BinaryFormat.Record([
Header = _,
FileName = BinaryFormat.Text(_[FileNameLen]),
ExtraField = BinaryFormat.Text(_[ExtraFieldLen]),
UncompressedData = BinaryFormat.Transform(BinaryFormat.Binary(_[CompressedSize]),(x) => try Binary.Buffer(Binary.Decompress(x, Compression.Deflate)) otherwise null)
]), type binary),
ZipFormat = BinaryFormat.List(FileEntry, each _<> null),
Entries = List.Transform(
List.RemoveLastN( ZipFormat(ZIPFile), 1),
(e) => [FileName = e[FileName], Content = e[UncompressedData] ]
)
in
Table.FromRecords(Entries),
UnzipedFileTable = try UnzipFile(openXmlFileBinary) otherwise error "Source file is not an OpenXml file.",
FilterProperties = let tblProps = Table.SelectRows(UnzipedFileTable, each Text.StartsWith([FileName], "docProps/") and Text.EndsWith([FileName], ".xml")) in if Table.RowCount(tblProps)=0 then error "Source file is not an OpenXml file." else tblProps,
TransformXmlBinary = Table.TransformColumns(FilterProperties,{{"Content", each Xml.Document(_), type table}}),
RemoveXmlErrors = Table.RemoveRowsWithErrors(TransformXmlBinary, {"Content"}),
RecoverXmlData = Table.ExpandTableColumn(RemoveXmlErrors, "Content", {"Value"}, {"Value"}),
RecoverProperties = Table.ExpandTableColumn(RecoverXmlData, "Value", {"Name", "Value"}, {"Name", "Value"}),
RemoveEmptyProperties = Table.SelectRows(RecoverProperties, each (try Table.RowCount([Value]) otherwise 1)>0),
AddColumnKind = Table.AddColumn(RemoveEmptyProperties, "Kind", each Text.BetweenDelimiters([FileName], "/", "."), type text),
SelectColumns = Table.SelectColumns(AddColumnKind,{"Kind", "Name", "Value"})
in
SelectColumns,
DocumentationMetadata =
[
Documentation.Name = "GetOpenXmlFileProps",
Documentation.Description = "This function extract properties of an OpenXml file (docx, xlsm, ppts, ...).",
Documentation.Examples =
{
[
Description = "Extract properties of a .docx file:",
Code = "GetOpenXmlFileProps(File.Contents(""c:\folder\document.docx""))",
Result = "Table with properties kind, name and values."
],
[
Description = "Extract 'last modified by' property of a .xlsm file:",
Code = "GetOpenXmlFileProps(File.Contents(""c:\folder\workbook.xlsm"")){[Kind=""core"", Name=""lastModifiedBy""]}[Value]",
Result = "Text with 'last modified by' property."
]
}
]
in
Value.ReplaceType(fn, Value.ReplaceMetadata(Value.Type(fn), DocumentationMetadata))