let
fn = (openXmlFileBinary as binary) as table =>
let
// unzip function source: https://github.com/ibarrau/PowerBi-code/blob/master/PowerQuery/ExtractZIP.pq
UnzipFile = (ZIPFile as binary) =>
let
Header = BinaryFormat.Record([ Signature = BinaryFormat.ByteOrder(BinaryFormat.UnsignedInteger32,ByteOrder.LittleEndian),
Version = BinaryFormat.ByteOrder(BinaryFormat.UnsignedInteger16,ByteOrder.LittleEndian),
Flags = BinaryFormat.ByteOrder(BinaryFormat.UnsignedInteger16,ByteOrder.LittleEndian),
Compression = BinaryFormat.ByteOrder(BinaryFormat.UnsignedInteger16,ByteOrder.LittleEndian),
ModTime = BinaryFormat.ByteOrder(BinaryFormat.UnsignedInteger16,ByteOrder.LittleEndian),
ModDate = BinaryFormat.ByteOrder(BinaryFormat.UnsignedInteger16,ByteOrder.LittleEndian),
CRC32 = BinaryFormat.ByteOrder(BinaryFormat.UnsignedInteger32,ByteOrder.LittleEndian),
CompressedSize = BinaryFormat.ByteOrder(BinaryFormat.UnsignedInteger32,ByteOrder.LittleEndian),
UncompressedSize = BinaryFormat.ByteOrder(BinaryFormat.UnsignedInteger32,ByteOrder.LittleEndian),
FileNameLen = BinaryFormat.ByteOrder(BinaryFormat.UnsignedInteger16,ByteOrder.LittleEndian),
ExtraFieldLen = BinaryFormat.ByteOrder(BinaryFormat.UnsignedInteger16,ByteOrder.LittleEndian)
]),
FileEntry = BinaryFormat.Choice(Header, each if _[Signature] <> 0x4034B50 then BinaryFormat.Null else
BinaryFormat.Record([
Header = _,
FileName = BinaryFormat.Text(_[FileNameLen]),
ExtraField = BinaryFormat.Text(_[ExtraFieldLen]),
UncompressedData = BinaryFormat.Transform(BinaryFormat.Binary(_[CompressedSize]),(x) => try Binary.Buffer(Binary.Decompress(x, Compression.Deflate)) otherwise null)
]), type binary),
ZipFormat = BinaryFormat.List(FileEntry, each _<> null),
Entries = List.Transform(
List.RemoveLastN( ZipFormat(ZIPFile), 1),
(e) => [FileName = e[FileName], Content = e[UncompressedData] ]
)
in
Table.FromRecords(Entries),
UnzipedFileTable = try UnzipFile(openXmlFileBinary) otherwise error "Source file is not an OpenXml file.",
docPropsCoreBinary = try Table.SelectRows(UnzipedFileTable, each ([FileName] = "docProps/core.xml")){0}[Content] otherwise error "Source file is not an OpenXml file.",
ContenuXml = Xml.Document(docPropsCoreBinary),
Value = ContenuXml{0}[Value],
SelectedColumns = Table.SelectColumns(Value,{"Name", "Value"})
in
SelectedColumns,
DocumentationMetadata =
[
Documentation.Name = "GetOpenXmlFileCoreProps",
Documentation.Description = "This function extract 'core properties' of an OpenXml file (docx, xlsm, ppts, ...).",
Documentation.Examples =
{
[
Description = "Extract core properties of a .docx file:",
Code = "GetOpenXmlFileCoreProps(File.Contents(""c:\folder\document.docx"")",
Result = "Table with name and values of core properties."
]
}
]
in
Value.ReplaceType(fn, Value.ReplaceMetadata(Value.Type(fn), DocumentationMetadata))