diff --git a/premake5.lua b/premake5.lua index 30d025979..b8b1a07b5 100644 --- a/premake5.lua +++ b/premake5.lua @@ -228,6 +228,7 @@ solution("xenia") include("third_party/glslang-spirv.lua") include("third_party/imgui.lua") include("third_party/libav.lua") + include("third_party/mspack.lua") include("third_party/snappy.lua") include("third_party/spirv-tools.lua") include("third_party/volk.lua") diff --git a/src/xenia/cpu/xex_module.cc b/src/xenia/cpu/xex_module.cc index 8569e7a9a..8b6b5e173 100644 --- a/src/xenia/cpu/xex_module.cc +++ b/src/xenia/cpu/xex_module.cc @@ -25,7 +25,6 @@ #include "third_party/crypto/rijndael-alg-fst.c" #include "third_party/crypto/rijndael-alg-fst.h" #include "third_party/mspack/lzx.h" -#include "third_party/mspack/lzxd.c" #include "third_party/mspack/mspack.h" #include "third_party/pe/pe_image.h" @@ -120,7 +119,7 @@ int lzx_decompress(const void* lzx_data, size_t lzx_len, void* dest, mspack_memory_file* lzxdst = mspack_memory_open(sys, dest, dest_len); lzxd_stream* lzxd = lzxd_init(sys, (struct mspack_file*)lzxsrc, (struct mspack_file*)lzxdst, - window_bits, 0, 0x8000, (off_t)dest_len); + window_bits, 0, 0x8000, (off_t)dest_len, 0); if (lzxd) { if (window_data) { diff --git a/third_party/mspack.lua b/third_party/mspack.lua new file mode 100644 index 000000000..85b6bc08f --- /dev/null +++ b/third_party/mspack.lua @@ -0,0 +1,33 @@ +group("third_party") +project("mspack") + uuid("0881692A-75A1-4E7B-87D8-BB9108CEDEA4") + kind("StaticLib") + language("C") + + defines({ + "_LIB", + "HAVE_CONFIG_H", + }) + removedefines({ + "_UNICODE", + "UNICODE", + }) + includedirs({ + "mspack", + }) + files({ + "mspack/lzx.h", + "mspack/lzxd.c", + "mspack/mspack.h", + "mspack/readbits.h", + "mspack/readhuff.h", + "mspack/system.c", + "mspack/system.h", + }) + + filter("platforms:Windows") + defines({ + }) + filter("platforms:Linux") + defines({ + }) diff --git a/third_party/mspack/COPYING.LIB b/third_party/mspack/COPYING.LIB new file mode 100644 index 000000000..b1e3f5a26 --- /dev/null +++ b/third_party/mspack/COPYING.LIB @@ -0,0 +1,504 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/third_party/mspack/config.h b/third_party/mspack/config.h new file mode 100644 index 000000000..c4d21f9f7 --- /dev/null +++ b/third_party/mspack/config.h @@ -0,0 +1,114 @@ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* Turn debugging mode on? */ +#undef DEBUG + +/* Define to 1 if you have the header file. */ +#undef HAVE_DLFCN_H + +/* Define to 1 if fseeko (and presumably ftello) exists and is declared. */ +#undef HAVE_FSEEKO + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have the `mkdir' function. */ +#undef HAVE_MKDIR + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the `towlower' function. */ +#undef HAVE_TOWLOWER + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Define to 1 if you have the `_mkdir' function. */ +#undef HAVE__MKDIR + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#undef LT_OBJDIR + +/* Define if mkdir takes only one argument. */ +#undef MKDIR_TAKES_ONE_ARG + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* The size of `off_t', as computed by sizeof. */ +#undef SIZEOF_OFF_T + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Version number of package */ +#undef VERSION + +/* Enable large inode numbers on Mac OS X 10.5. */ +#ifndef _DARWIN_USE_64_BIT_INODE +# define _DARWIN_USE_64_BIT_INODE 1 +#endif + +/* Number of bits in a file offset, on hosts where this is settable. */ +#undef _FILE_OFFSET_BITS + +/* Define to 1 to make fseeko visible on some hosts (e.g. glibc 2.2). */ +#undef _LARGEFILE_SOURCE + +/* Define for large files, on AIX-style hosts. */ +#undef _LARGE_FILES + +/* Define to empty if `const' does not conform to ANSI C. */ +#undef const + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +#undef inline +#endif + +/* Define to `int' if does not define. */ +#undef mode_t + +/* Define to `long int' if does not define. */ +#undef off_t + +/* Define to `unsigned int' if does not define. */ +#undef size_t diff --git a/third_party/mspack/lzx.h b/third_party/mspack/lzx.h index e9eda0fbb..a6152f622 100644 --- a/third_party/mspack/lzx.h +++ b/third_party/mspack/lzx.h @@ -1,5 +1,5 @@ /* This file is part of libmspack. - * (C) 2003-2004 Stuart Caie. + * (C) 2003-2013 Stuart Caie. * * The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted * by Microsoft Corporation. @@ -13,6 +13,10 @@ #ifndef MSPACK_LZX_H #define MSPACK_LZX_H 1 +#ifdef __cplusplus +extern "C" { +#endif + /* LZX compression / decompression definitions */ /* some constants defined by the LZX specification */ @@ -31,7 +35,7 @@ /* LZX huffman defines: tweak tablebits as desired */ #define LZX_PRETREE_MAXSYMBOLS (LZX_PRETREE_NUM_ELEMENTS) #define LZX_PRETREE_TABLEBITS (6) -#define LZX_MAINTREE_MAXSYMBOLS (LZX_NUM_CHARS + 50*8) +#define LZX_MAINTREE_MAXSYMBOLS (LZX_NUM_CHARS + 290*8) #define LZX_MAINTREE_TABLEBITS (12) #define LZX_LENGTH_MAXSYMBOLS (LZX_NUM_SECONDARY_LENGTHS+1) #define LZX_LENGTH_TABLEBITS (12) @@ -51,6 +55,8 @@ struct lzxd_stream { unsigned char *window; /* decoding window */ unsigned int window_size; /* window size */ + unsigned int ref_data_size; /* LZX DELTA reference data size */ + unsigned int num_offsets; /* number of match_offset entries in table */ unsigned int window_posn; /* decompression offset within window */ unsigned int frame_posn; /* current frame offset within in window */ unsigned int frame; /* the number of 32kb frames processed */ @@ -66,8 +72,8 @@ struct lzxd_stream { unsigned char intel_started; /* has intel E8 decoding started? */ unsigned char block_type; /* type of the current block */ unsigned char header_read; /* have we started decoding at all yet? */ - unsigned char posn_slots; /* how many posn slots in stream? */ unsigned char input_end; /* have we reached the end of input? */ + unsigned char is_delta; /* does stream follow LZX DELTA spec? */ int error; @@ -83,85 +89,133 @@ struct lzxd_stream { /* huffman decoding tables */ unsigned short PRETREE_table [(1 << LZX_PRETREE_TABLEBITS) + - (LZX_PRETREE_MAXSYMBOLS * 2)]; + (LZX_PRETREE_MAXSYMBOLS * 2)]; unsigned short MAINTREE_table[(1 << LZX_MAINTREE_TABLEBITS) + - (LZX_MAINTREE_MAXSYMBOLS * 2)]; + (LZX_MAINTREE_MAXSYMBOLS * 2)]; unsigned short LENGTH_table [(1 << LZX_LENGTH_TABLEBITS) + - (LZX_LENGTH_MAXSYMBOLS * 2)]; + (LZX_LENGTH_MAXSYMBOLS * 2)]; unsigned short ALIGNED_table [(1 << LZX_ALIGNED_TABLEBITS) + - (LZX_ALIGNED_MAXSYMBOLS * 2)]; + (LZX_ALIGNED_MAXSYMBOLS * 2)]; + unsigned char LENGTH_empty; /* this is used purely for doing the intel E8 transform */ unsigned char e8_buf[LZX_FRAME_SIZE]; }; -/* allocates LZX decompression state for decoding the given stream. +/** + * Allocates and initialises LZX decompression state for decoding an LZX + * stream. * - * - returns NULL if window_bits is outwith the range 15 to 21 (inclusive). + * This routine uses system->alloc() to allocate memory. If memory + * allocation fails, or the parameters to this function are invalid, + * NULL is returned. * - * - uses system->alloc() to allocate memory - * - * - returns NULL if not enough memory - * - * - window_bits is the size of the LZX window, from 32Kb (15) to 2Mb (21). - * - * - reset_interval is how often the bitstream is reset, measured in - * multiples of 32Kb bytes output. For CAB LZX streams, this is always 0 - * (does not occur). - * - * - input_buffer_size is how many bytes to use as an input bitstream buffer - * - * - output_length is the length in bytes of the entirely decompressed - * output stream, if known in advance. It is used to correctly perform - * the Intel E8 transformation, which must stop 6 bytes before the very - * end of the decompressed stream. It is not otherwise used or adhered - * to. If the full decompressed length is known in advance, set it here. - * If it is NOT known, use the value 0, and call lzxd_set_output_length() - * once it is known. If never set, 4 of the final 6 bytes of the output - * stream may be incorrect. + * @param system an mspack_system structure used to read from + * the input stream and write to the output + * stream, also to allocate and free memory. + * @param input an input stream with the LZX data. + * @param output an output stream to write the decoded data to. + * @param window_bits the size of the decoding window, which must be + * between 15 and 21 inclusive for regular LZX + * data, or between 17 and 25 inclusive for + * LZX DELTA data. + * @param reset_interval the interval at which the LZX bitstream is + * reset, in multiples of LZX frames (32678 + * bytes), e.g. a value of 2 indicates the input + * stream resets after every 65536 output bytes. + * A value of 0 indicates that the bitstream never + * resets, such as in CAB LZX streams. + * @param input_buffer_size the number of bytes to use as an input + * bitstream buffer. + * @param output_length the length in bytes of the entirely + * decompressed output stream, if known in + * advance. It is used to correctly perform the + * Intel E8 transformation, which must stop 6 + * bytes before the very end of the + * decompressed stream. It is not otherwise used + * or adhered to. If the full decompressed + * length is known in advance, set it here. + * If it is NOT known, use the value 0, and call + * lzxd_set_output_length() once it is + * known. If never set, 4 of the final 6 bytes + * of the output stream may be incorrect. + * @param is_delta should be zero for all regular LZX data, + * non-zero for LZX DELTA encoded data. + * @return a pointer to an initialised lzxd_stream structure, or NULL if + * there was not enough memory or parameters to the function were wrong. */ extern struct lzxd_stream *lzxd_init(struct mspack_system *system, - struct mspack_file *input, - struct mspack_file *output, - int window_bits, - int reset_interval, - int input_buffer_size, - off_t output_length); + struct mspack_file *input, + struct mspack_file *output, + int window_bits, + int reset_interval, + int input_buffer_size, + off_t output_length, + char is_delta); /* see description of output_length in lzxd_init() */ extern void lzxd_set_output_length(struct lzxd_stream *lzx, - off_t output_length); + off_t output_length); -/* decompresses, or decompresses more of, an LZX stream. +/** + * Reads LZX DELTA reference data into the window and allows + * lzxd_decompress() to reference it. * - * - out_bytes of data will be decompressed and the function will return - * with an MSPACK_ERR_OK return code. + * Call this before the first call to lzxd_decompress(). + + * @param lzx the LZX stream to apply this reference data to + * @param system an mspack_system implementation to use with the + * input param. Only read() will be called. + * @param input an input file handle to read reference data using + * system->read(). + * @param length the length of the reference data. Cannot be longer + * than the LZX window size. + * @return an error code, or MSPACK_ERR_OK if successful + */ +extern int lzxd_set_reference_data(struct lzxd_stream *lzx, + struct mspack_system *system, + struct mspack_file *input, + unsigned int length); + +/** + * Decompresses entire or partial LZX streams. * - * - decompressing will stop as soon as out_bytes is reached. if the true - * amount of bytes decoded spills over that amount, they will be kept for - * a later invocation of lzxd_decompress(). + * The number of bytes of data that should be decompressed is given as the + * out_bytes parameter. If more bytes are decoded than are needed, they + * will be kept over for a later invocation. * - * - the output bytes will be passed to the system->write() function given in - * lzxd_init(), using the output file handle given in lzxd_init(). More - * than one call may be made to system->write(). + * The output bytes will be passed to the system->write() function given in + * lzxd_init(), using the output file handle given in lzxd_init(). More than + * one call may be made to system->write(). + + * Input bytes will be read in as necessary using the system->read() + * function given in lzxd_init(), using the input file handle given in + * lzxd_init(). This will continue until system->read() returns 0 bytes, + * or an error. Errors will be passed out of the function as + * MSPACK_ERR_READ errors. Input streams should convey an "end of input + * stream" by refusing to supply all the bytes that LZX asks for when they + * reach the end of the stream, rather than return an error code. * - * - LZX will read input bytes as necessary using the system->read() function - * given in lzxd_init(), using the input file handle given in lzxd_init(). - * This will continue until system->read() returns 0 bytes, or an error. - * input streams should convey an "end of input stream" by refusing to - * supply all the bytes that LZX asks for when they reach the end of the - * stream, rather than return an error code. + * If any error code other than MSPACK_ERR_OK is returned, the stream + * should be considered unusable and lzxd_decompress() should not be + * called again on this stream. * - * - if an error code other than MSPACK_ERR_OK is returned, the stream should - * be considered unusable and lzxd_decompress() should not be called again - * on this stream. + * @param lzx LZX decompression state, as allocated by lzxd_init(). + * @param out_bytes the number of bytes of data to decompress. + * @return an error code, or MSPACK_ERR_OK if successful */ extern int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes); -/* frees all state associated with an LZX data stream +/** + * Frees all state associated with an LZX data stream. This will call + * system->free() using the system pointer given in lzxd_init(). * - * - calls system->free() using the system pointer given in lzxd_init() + * @param lzx LZX decompression state to free. */ void lzxd_free(struct lzxd_stream *lzx); +#ifdef __cplusplus +} +#endif + #endif diff --git a/third_party/mspack/lzxd.c b/third_party/mspack/lzxd.c index 2fdf23e80..6cc33df08 100644 --- a/third_party/mspack/lzxd.c +++ b/third_party/mspack/lzxd.c @@ -1,5 +1,5 @@ /* This file is part of libmspack. - * (C) 2003-2004 Stuart Caie. + * (C) 2003-2013 Stuart Caie. * * The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted * by Microsoft Corporation. @@ -12,11 +12,11 @@ /* LZX decompression implementation */ -#include "mspack.h" -#include "lzx.h" +#include +#include -/* Microsoft's LZX document and their implementation of the - * com.ms.util.cab Java package do not concur. +/* Microsoft's LZX document (in cab-sdk.exe) and their implementation + * of the com.ms.util.cab Java package do not concur. * * In the LZX document, there is a table showing the correlation between * window size and the number of position slots. It states that the 1MB @@ -58,240 +58,85 @@ * least one element. However, many CAB files contain blocks where the * length tree is completely empty (because there are no matches), and * this is expected to succeed. + * + * The errors in LZX documentation appear have been corrected in the + * new documentation for the LZX DELTA format. + * + * http://msdn.microsoft.com/en-us/library/cc483133.aspx + * + * However, this is a different format, an extension of regular LZX. + * I have noticed the following differences, there may be more: + * + * The maximum window size has increased from 2MB to 32MB. This also + * increases the maximum number of position slots, etc. + * + * If the match length is 257 (the maximum possible), this signals + * a further length decoding step, that allows for matches up to + * 33024 bytes long. + * + * The format now allows for "reference data", supplied by the caller. + * If match offsets go further back than the number of bytes + * decompressed so far, that is them accessing the reference data. */ - -/* LZX decompressor input macros - * - * STORE_BITS stores bitstream state in lzxd_stream structure - * RESTORE_BITS restores bitstream state from lzxd_stream structure - * READ_BITS(var,n) takes N bits from the buffer and puts them in var - * ENSURE_BITS(n) ensures there are at least N bits in the bit buffer. - * PEEK_BITS(n) extracts without removing N bits from the bit buffer - * REMOVE_BITS(n) removes N bits from the bit buffer - * - * These bit access routines work by using the area beyond the MSB and the - * LSB as a free source of zeroes when shifting. This avoids having to - * mask any bits. So we have to know the bit width of the bit buffer - * variable. - * - * The bit buffer datatype should be at least 32 bits wide: it must be - * possible to ENSURE_BITS(16), so it must be possible to add 16 new bits - * to the bit buffer when the bit buffer already has 1 to 15 bits left. - */ - -#include -#ifndef CHAR_BIT -# define CHAR_BIT (8) -#endif -#define BITBUF_WIDTH (sizeof(bit_buffer) * CHAR_BIT) - -#ifdef LZXDEBUG -# include -# define D(x) do { printf("%s:%d (%s) ",__FILE__, __LINE__, __FUNCTION__); \ - printf x ; fputc('\n', stdout); fflush(stdout);} while (0); -#else -# define D(x) -#endif - -#define STORE_BITS do { \ - lzx->i_ptr = i_ptr; \ - lzx->i_end = i_end; \ - lzx->bit_buffer = bit_buffer; \ - lzx->bits_left = bits_left; \ +/* import bit-reading macros and code */ +#define BITS_TYPE struct lzxd_stream +#define BITS_VAR lzx +#define BITS_ORDER_MSB +#define READ_BYTES do { \ + unsigned char b0, b1; \ + READ_IF_NEEDED; b0 = *i_ptr++; \ + READ_IF_NEEDED; b1 = *i_ptr++; \ + INJECT_BITS((b1 << 8) | b0, 16); \ } while (0) +#include -#define RESTORE_BITS do { \ - i_ptr = lzx->i_ptr; \ - i_end = lzx->i_end; \ - bit_buffer = lzx->bit_buffer; \ - bits_left = lzx->bits_left; \ -} while (0) - -#define ENSURE_BITS(nbits) \ - while (bits_left < (nbits)) { \ - if (i_ptr >= i_end) { \ - if (lzxd_read_input(lzx)) return lzx->error; \ - i_ptr = lzx->i_ptr; \ - i_end = lzx->i_end; \ - } \ - bit_buffer |= ((i_ptr[1] << 8) | i_ptr[0]) \ - << (BITBUF_WIDTH - 16 - bits_left); \ - bits_left += 16; \ - i_ptr += 2; \ - } - -#define PEEK_BITS(nbits) (bit_buffer >> (BITBUF_WIDTH - (nbits))) - -#define REMOVE_BITS(nbits) ((bit_buffer <<= (nbits)), (bits_left -= (nbits))) - -#define READ_BITS(val, nbits) do { \ - ENSURE_BITS(nbits); \ - (val) = PEEK_BITS(nbits); \ - REMOVE_BITS(nbits); \ -} while (0) - -static int lzxd_read_input(struct lzxd_stream *lzx) { - int read = lzx->sys->read(lzx->input, &lzx->inbuf[0], (int)lzx->inbuf_size); - if (read < 0) return lzx->error = MSPACK_ERR_READ; - - /* huff decode's ENSURE_BYTES(16) might overrun the input stream, even - * if those bits aren't used, so fake 2 more bytes */ - if (read == 0) { - if (lzx->input_end) { - D(("out of input bytes")) - return lzx->error = MSPACK_ERR_READ; - } - else { - read = 2; - lzx->inbuf[0] = lzx->inbuf[1] = 0; - lzx->input_end = 1; - } - } - - lzx->i_ptr = &lzx->inbuf[0]; - lzx->i_end = &lzx->inbuf[read]; - - return MSPACK_ERR_OK; -} - -/* Huffman decoding macros */ - -/* READ_HUFFSYM(tablename, var) decodes one huffman symbol from the - * bitstream using the stated table and puts it in var. - */ -#define READ_HUFFSYM(tbl, var) do { \ - /* huffman symbols can be up to 16 bits long */ \ - ENSURE_BITS(16); \ - /* immediate table lookup of [tablebits] bits of the code */ \ - sym = lzx->tbl##_table[PEEK_BITS(LZX_##tbl##_TABLEBITS)]; \ - /* is the symbol is longer than [tablebits] bits? (i=node index) */ \ - if (sym >= LZX_##tbl##_MAXSYMBOLS) { \ - /* decode remaining bits by tree traversal */ \ - i = 1 << (BITBUF_WIDTH - LZX_##tbl##_TABLEBITS); \ - do { \ - /* one less bit. error if we run out of bits before decode */ \ - i >>= 1; \ - if (i == 0) { \ - D(("out of bits in huffman decode")) \ - return lzx->error = MSPACK_ERR_DECRUNCH; \ - } \ - /* double node index and add 0 (left branch) or 1 (right) */ \ - sym <<= 1; sym |= (bit_buffer & i) ? 1 : 0; \ - /* hop to next node index / decoded symbol */ \ - sym = lzx->tbl##_table[sym]; \ - /* while we are still in node indicies, not decoded symbols */ \ - } while (sym >= LZX_##tbl##_MAXSYMBOLS); \ - } \ - /* result */ \ - (var) = sym; \ - /* look up the code length of that symbol and discard those bits */ \ - i = lzx->tbl##_len[sym]; \ - REMOVE_BITS(i); \ -} while (0) +/* import huffman-reading macros and code */ +#define TABLEBITS(tbl) LZX_##tbl##_TABLEBITS +#define MAXSYMBOLS(tbl) LZX_##tbl##_MAXSYMBOLS +#define HUFF_TABLE(tbl,idx) lzx->tbl##_table[idx] +#define HUFF_LEN(tbl,idx) lzx->tbl##_len[idx] +#define HUFF_ERROR return lzx->error = MSPACK_ERR_DECRUNCH +#include /* BUILD_TABLE(tbl) builds a huffman lookup table from code lengths */ #define BUILD_TABLE(tbl) \ - if (make_decode_table(LZX_##tbl##_MAXSYMBOLS, LZX_##tbl##_TABLEBITS, \ - &lzx->tbl##_len[0], &lzx->tbl##_table[0])) \ - { \ - D(("failed to build %s table", #tbl)) \ - return lzx->error = MSPACK_ERR_DECRUNCH; \ - } - -/* make_decode_table(nsyms, nbits, length[], table[]) - * - * This function was coded by David Tritscher. It builds a fast huffman - * decoding table from a canonical huffman code lengths table. - * - * nsyms = total number of symbols in this huffman tree. - * nbits = any symbols with a code length of nbits or less can be decoded - * in one lookup of the table. - * length = A table to get code lengths from [0 to syms-1] - * table = The table to fill up with decoded symbols and pointers. - * - * Returns 0 for OK or 1 for error - */ - -static int make_decode_table(unsigned int nsyms, unsigned int nbits, - unsigned char *length, unsigned short *table) -{ - unsigned short sym; - unsigned int leaf, fill; - unsigned char bit_num; - unsigned int pos = 0; /* the current position in the decode table */ - unsigned int table_mask = 1 << nbits; - unsigned int bit_mask = table_mask >> 1; /* don't do 0 length codes */ - unsigned int next_symbol = bit_mask; /* base of allocation for long codes */ - - /* fill entries for codes short enough for a direct mapping */ - for (bit_num = 1; bit_num <= nbits; bit_num++) { - for (sym = 0; sym < nsyms; sym++) { - if (length[sym] != bit_num) continue; - leaf = pos; - if((pos += bit_mask) > table_mask) return 1; /* table overrun */ - /* fill all possible lookups of this symbol with the symbol itself */ - for (fill = bit_mask; fill-- > 0;) table[leaf++] = sym; + if (make_decode_table(MAXSYMBOLS(tbl), TABLEBITS(tbl), \ + &HUFF_LEN(tbl,0), &HUFF_TABLE(tbl,0))) \ + { \ + D(("failed to build %s table", #tbl)) \ + return lzx->error = MSPACK_ERR_DECRUNCH; \ } - bit_mask >>= 1; - } - - /* full table already? */ - if (pos == table_mask) return 0; - - /* clear the remainder of the table */ - for (sym = pos; sym < table_mask; sym++) table[sym] = 0xFFFF; - - /* allow codes to be up to nbits+16 long, instead of nbits */ - pos <<= 16; - table_mask <<= 16; - bit_mask = 1 << 15; - - for (bit_num = nbits+1; bit_num <= 16; bit_num++) { - for (sym = 0; sym < nsyms; sym++) { - if (length[sym] != bit_num) continue; - - leaf = pos >> 16; - for (fill = 0; fill < bit_num - nbits; fill++) { - /* if this path hasn't been taken yet, 'allocate' two entries */ - if (table[leaf] == 0xFFFF) { - table[(next_symbol << 1)] = 0xFFFF; - table[(next_symbol << 1) + 1] = 0xFFFF; - table[leaf] = next_symbol++; - } - /* follow the path and select either left or right for next bit */ - leaf = table[leaf] << 1; - if ((pos >> (15-fill)) & 1) leaf++; - } - table[leaf] = sym; - - if ((pos += bit_mask) > table_mask) return 1; /* table overflow */ - } - bit_mask >>= 1; - } - - /* full table? */ - if (pos == table_mask) return 0; - - /* either erroneous table, or all elements are 0 - let's find out. */ - for (sym = 0; sym < nsyms; sym++) if (length[sym]) return 1; - return 0; -} +#define BUILD_TABLE_MAYBE_EMPTY(tbl) do { \ + lzx->tbl##_empty = 0; \ + if (make_decode_table(MAXSYMBOLS(tbl), TABLEBITS(tbl), \ + &HUFF_LEN(tbl,0), &HUFF_TABLE(tbl,0))) \ + { \ + for (i = 0; i < MAXSYMBOLS(tbl); i++) { \ + if (HUFF_LEN(tbl, i) > 0) { \ + D(("failed to build %s table", #tbl)) \ + return lzx->error = MSPACK_ERR_DECRUNCH; \ + } \ + } \ + /* empty tree - allow it, but don't decode symbols with it */ \ + lzx->tbl##_empty = 1; \ + } \ +} while (0) /* READ_LENGTHS(tablename, first, last) reads in code lengths for symbols * first to last in the given table. The code lengths are stored in their * own special LZX way. */ -#define READ_LENGTHS(tbl, first, last) do { \ - STORE_BITS; \ - if (lzxd_read_lens(lzx, &lzx->tbl##_len[0], (first), \ - (unsigned int)(last))) return lzx->error; \ - RESTORE_BITS; \ +#define READ_LENGTHS(tbl, first, last) do { \ + STORE_BITS; \ + if (lzxd_read_lens(lzx, &HUFF_LEN(tbl, 0), (first), \ + (unsigned int)(last))) return lzx->error; \ + RESTORE_BITS; \ } while (0) static int lzxd_read_lens(struct lzxd_stream *lzx, unsigned char *lens, - unsigned int first, unsigned int last) + unsigned int first, unsigned int last) { /* bit buffer and huffman symbol decode variables */ unsigned int bit_buffer; @@ -348,27 +193,71 @@ static int lzxd_read_lens(struct lzxd_stream *lzx, unsigned char *lens, * a small 'position slot' number and a small offset from that slot are * encoded instead of one large offset. * + * The number of slots is decided by how many are needed to encode the + * largest offset for a given window size. This is easy when the gap between + * slots is less than 128Kb, it's a linear relationship. But when extra_bits + * reaches its limit of 17 (because LZX can only ensure reading 17 bits of + * data at a time), we can only jump 128Kb at a time and have to start + * using more and more position slots as each window size doubles. + * * position_base[] is an index to the position slot bases * * extra_bits[] states how many bits of offset-from-base data is needed. + * + * They are calculated as follows: + * extra_bits[i] = 0 where i < 4 + * extra_bits[i] = floor(i/2)-1 where i >= 4 && i < 36 + * extra_bits[i] = 17 where i >= 36 + * position_base[0] = 0 + * position_base[i] = position_base[i-1] + (1 << extra_bits[i-1]) */ -static unsigned int position_base[51]; -static unsigned char extra_bits[51]; - -static void lzxd_static_init() { - int i, j; - - for (i = 0, j = 0; i < 51; i += 2) { - extra_bits[i] = j; /* 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7... */ - extra_bits[i+1] = j; - if ((i != 0) && (j < 17)) j++; /* 0,0,1,2,3,4...15,16,17,17,17,17... */ - } - - for (i = 0, j = 0; i < 51; i++) { - position_base[i] = j; /* 0,1,2,3,4,6,8,12,16,24,32,... */ - j += 1 << extra_bits[i]; /* 1,1,1,1,2,2,4,4,8,8,16,16,32,32,... */ - } -} +static const unsigned int position_slots[11] = { + 30, 32, 34, 36, 38, 42, 50, 66, 98, 162, 290 +}; +static const unsigned char extra_bits[36] = { + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16 +}; +static const unsigned int position_base[290] = { + 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, + 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, + 49152, 65536, 98304, 131072, 196608, 262144, 393216, 524288, 655360, + 786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936, + 1835008, 1966080, 2097152, 2228224, 2359296, 2490368, 2621440, 2752512, + 2883584, 3014656, 3145728, 3276800, 3407872, 3538944, 3670016, 3801088, + 3932160, 4063232, 4194304, 4325376, 4456448, 4587520, 4718592, 4849664, + 4980736, 5111808, 5242880, 5373952, 5505024, 5636096, 5767168, 5898240, + 6029312, 6160384, 6291456, 6422528, 6553600, 6684672, 6815744, 6946816, + 7077888, 7208960, 7340032, 7471104, 7602176, 7733248, 7864320, 7995392, + 8126464, 8257536, 8388608, 8519680, 8650752, 8781824, 8912896, 9043968, + 9175040, 9306112, 9437184, 9568256, 9699328, 9830400, 9961472, 10092544, + 10223616, 10354688, 10485760, 10616832, 10747904, 10878976, 11010048, + 11141120, 11272192, 11403264, 11534336, 11665408, 11796480, 11927552, + 12058624, 12189696, 12320768, 12451840, 12582912, 12713984, 12845056, + 12976128, 13107200, 13238272, 13369344, 13500416, 13631488, 13762560, + 13893632, 14024704, 14155776, 14286848, 14417920, 14548992, 14680064, + 14811136, 14942208, 15073280, 15204352, 15335424, 15466496, 15597568, + 15728640, 15859712, 15990784, 16121856, 16252928, 16384000, 16515072, + 16646144, 16777216, 16908288, 17039360, 17170432, 17301504, 17432576, + 17563648, 17694720, 17825792, 17956864, 18087936, 18219008, 18350080, + 18481152, 18612224, 18743296, 18874368, 19005440, 19136512, 19267584, + 19398656, 19529728, 19660800, 19791872, 19922944, 20054016, 20185088, + 20316160, 20447232, 20578304, 20709376, 20840448, 20971520, 21102592, + 21233664, 21364736, 21495808, 21626880, 21757952, 21889024, 22020096, + 22151168, 22282240, 22413312, 22544384, 22675456, 22806528, 22937600, + 23068672, 23199744, 23330816, 23461888, 23592960, 23724032, 23855104, + 23986176, 24117248, 24248320, 24379392, 24510464, 24641536, 24772608, + 24903680, 25034752, 25165824, 25296896, 25427968, 25559040, 25690112, + 25821184, 25952256, 26083328, 26214400, 26345472, 26476544, 26607616, + 26738688, 26869760, 27000832, 27131904, 27262976, 27394048, 27525120, + 27656192, 27787264, 27918336, 28049408, 28180480, 28311552, 28442624, + 28573696, 28704768, 28835840, 28966912, 29097984, 29229056, 29360128, + 29491200, 29622272, 29753344, 29884416, 30015488, 30146560, 30277632, + 30408704, 30539776, 30670848, 30801920, 30932992, 31064064, 31195136, + 31326208, 31457280, 31588352, 31719424, 31850496, 31981568, 32112640, + 32243712, 32374784, 32505856, 32636928, 32768000, 32899072, 33030144, + 33161216, 33292288, 33423360 +}; static void lzxd_reset_state(struct lzxd_stream *lzx) { int i; @@ -388,35 +277,46 @@ static void lzxd_reset_state(struct lzxd_stream *lzx) { /*-------- main LZX code --------*/ struct lzxd_stream *lzxd_init(struct mspack_system *system, - struct mspack_file *input, - struct mspack_file *output, - int window_bits, - int reset_interval, - int input_buffer_size, - off_t output_length) + struct mspack_file *input, + struct mspack_file *output, + int window_bits, + int reset_interval, + int input_buffer_size, + off_t output_length, + char is_delta) { unsigned int window_size = 1 << window_bits; struct lzxd_stream *lzx; if (!system) return NULL; - /* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */ - if (window_bits < 15 || window_bits > 21) return NULL; + /* LZX DELTA window sizes are between 2^17 (128KiB) and 2^25 (32MiB), + * regular LZX windows are between 2^15 (32KiB) and 2^21 (2MiB) + */ + if (is_delta) { + if (window_bits < 17 || window_bits > 25) return NULL; + } + else { + if (window_bits < 15 || window_bits > 21) return NULL; + } + if (reset_interval < 0 || output_length < 0) { + D(("reset interval or output length < 0")) + return NULL; + } + + /* round up input buffer size to multiple of two */ input_buffer_size = (input_buffer_size + 1) & -2; - if (!input_buffer_size) return NULL; - - /* initialise static data */ - lzxd_static_init(); + if (input_buffer_size < 2) return NULL; /* allocate decompression state */ - if (!(lzx = (struct lzxd_stream *)system->alloc(system, sizeof(struct lzxd_stream)))) { + if (!(lzx = (struct lzxd_stream *) system->alloc(system, sizeof(struct lzxd_stream)))) { return NULL; } /* allocate decompression window and input buffer */ - lzx->window = (unsigned char *)system->alloc(system, (size_t) window_size); - lzx->inbuf = (unsigned char *)system->alloc(system, (size_t) input_buffer_size); + lzx->window = (unsigned char *) system->alloc(system, (size_t) window_size); + lzx->inbuf = (unsigned char *) system->alloc(system, (size_t) input_buffer_size); if (!lzx->window || !lzx->inbuf) { system->free(lzx->window); system->free(lzx->inbuf); @@ -433,43 +333,73 @@ struct lzxd_stream *lzxd_init(struct mspack_system *system, lzx->inbuf_size = input_buffer_size; lzx->window_size = 1 << window_bits; + lzx->ref_data_size = 0; lzx->window_posn = 0; lzx->frame_posn = 0; lzx->frame = 0; lzx->reset_interval = reset_interval; lzx->intel_filesize = 0; lzx->intel_curpos = 0; - - /* window bits: 15 16 17 18 19 20 21 - * position slots: 30 32 34 36 38 42 50 */ - lzx->posn_slots = ((window_bits == 21) ? 50 : - ((window_bits == 20) ? 42 : (window_bits << 1))); lzx->intel_started = 0; - lzx->input_end = 0; + lzx->error = MSPACK_ERR_OK; + lzx->num_offsets = position_slots[window_bits - 15] << 3; + lzx->is_delta = is_delta; - lzx->error = MSPACK_ERR_OK; - - lzx->i_ptr = lzx->i_end = &lzx->inbuf[0]; lzx->o_ptr = lzx->o_end = &lzx->e8_buf[0]; - lzx->bit_buffer = lzx->bits_left = 0; - lzxd_reset_state(lzx); + INIT_BITS; return lzx; } +int lzxd_set_reference_data(struct lzxd_stream *lzx, + struct mspack_system *system, + struct mspack_file *input, + unsigned int length) +{ + if (!lzx) return MSPACK_ERR_ARGS; + + if (!lzx->is_delta) { + D(("only LZX DELTA streams support reference data")) + return MSPACK_ERR_ARGS; + } + if (lzx->offset) { + D(("too late to set reference data after decoding starts")) + return MSPACK_ERR_ARGS; + } + if (length > lzx->window_size) { + D(("reference length (%u) is longer than the window", length)) + return MSPACK_ERR_ARGS; + } + if (length > 0 && (!system || !input)) { + D(("length > 0 but no system or input")) + return MSPACK_ERR_ARGS; + } + + lzx->ref_data_size = length; + if (length > 0) { + /* copy reference data */ + unsigned char *pos = &lzx->window[lzx->window_size - length]; + int bytes = system->read(input, pos, length); + /* length can't be more than 2^25, so no signedness problem */ + if (bytes < (int)length) return MSPACK_ERR_READ; + } + lzx->ref_data_size = length; + return MSPACK_ERR_OK; +} + void lzxd_set_output_length(struct lzxd_stream *lzx, off_t out_bytes) { - if (lzx) lzx->length = out_bytes; + if (lzx && out_bytes > 0) lzx->length = out_bytes; } int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) { - /* bitstream reading and huffman variables */ + /* bitstream and huffman reading variables */ unsigned int bit_buffer; int bits_left, i=0; - unsigned short sym; unsigned char *i_ptr, *i_end; + unsigned short sym; int match_length, length_footer, extra, verbatim_bits, bytes_todo; - int this_run, main_element, aligned_bits, j; + int this_run, main_element, aligned_bits, j, warned = 0; unsigned char *window, *runsrc, *rundest, buf[12]; unsigned int frame_size=0, end_frame, match_offset, window_posn; unsigned int R0, R1, R2; @@ -505,12 +435,25 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) { /* have we reached the reset interval? (if there is one?) */ if (lzx->reset_interval && ((lzx->frame % lzx->reset_interval) == 0)) { if (lzx->block_remaining) { - D(("%d bytes remaining at reset interval", lzx->block_remaining)) - return lzx->error = MSPACK_ERR_DECRUNCH; + /* this is a file format error, we can make a best effort to extract what we can */ + D(("%d bytes remaining at reset interval", lzx->block_remaining)) + if (!warned) { + lzx->sys->message(NULL, "WARNING; invalid reset interval detected during LZX decompression"); + warned++; + } } /* re-read the intel header and reset the huffman lengths */ lzxd_reset_state(lzx); + R0 = lzx->R0; + R1 = lzx->R1; + R2 = lzx->R2; + } + + /* LZX DELTA format has chunk_size, not present in LZX format */ + if (lzx->is_delta) { + ENSURE_BITS(16); + REMOVE_BITS(16); } /* read header if necessary */ @@ -527,7 +470,7 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) { * has been filled in. */ frame_size = LZX_FRAME_SIZE; if (lzx->length && (lzx->length - lzx->offset) < (off_t)frame_size) { - frame_size = (unsigned int)(lzx->length - lzx->offset); + frame_size = lzx->length - lzx->offset; } /* decode until one more frame is available */ @@ -535,70 +478,61 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) { while (bytes_todo > 0) { /* initialise new block, if one is needed */ if (lzx->block_remaining == 0) { - /* realign if previous block was an odd-sized UNCOMPRESSED block */ - if ((lzx->block_type == LZX_BLOCKTYPE_UNCOMPRESSED) && - (lzx->block_length & 1)) - { - if (i_ptr == i_end) { - if (lzxd_read_input(lzx)) return lzx->error; - i_ptr = lzx->i_ptr; - i_end = lzx->i_end; - } - i_ptr++; - } + /* realign if previous block was an odd-sized UNCOMPRESSED block */ + if ((lzx->block_type == LZX_BLOCKTYPE_UNCOMPRESSED) && + (lzx->block_length & 1)) + { + READ_IF_NEEDED; + i_ptr++; + } - /* read block type (3 bits) and block length (24 bits) */ - READ_BITS(lzx->block_type, 3); - READ_BITS(i, 16); READ_BITS(j, 8); - lzx->block_remaining = lzx->block_length = (i << 8) | j; - /*D(("new block t%d len %u", lzx->block_type, lzx->block_length))*/ + /* read block type (3 bits) and block length (24 bits) */ + READ_BITS(lzx->block_type, 3); + READ_BITS(i, 16); READ_BITS(j, 8); + lzx->block_remaining = lzx->block_length = (i << 8) | j; + /*D(("new block t%d len %u", lzx->block_type, lzx->block_length))*/ - /* read individual block headers */ - switch (lzx->block_type) { - case LZX_BLOCKTYPE_ALIGNED: - /* read lengths of and build aligned huffman decoding tree */ - for (i = 0; i < 8; i++) { READ_BITS(j, 3); lzx->ALIGNED_len[i] = j; } - BUILD_TABLE(ALIGNED); - /* no break -- rest of aligned header is same as verbatim */ - case LZX_BLOCKTYPE_VERBATIM: - /* read lengths of and build main huffman decoding tree */ - READ_LENGTHS(MAINTREE, 0, 256); - READ_LENGTHS(MAINTREE, 256, LZX_NUM_CHARS + (lzx->posn_slots << 3)); - BUILD_TABLE(MAINTREE); - /* if the literal 0xE8 is anywhere in the block... */ - if (lzx->MAINTREE_len[0xE8] != 0) lzx->intel_started = 1; - /* read lengths of and build lengths huffman decoding tree */ - READ_LENGTHS(LENGTH, 0, LZX_NUM_SECONDARY_LENGTHS); - BUILD_TABLE(LENGTH); - break; + /* read individual block headers */ + switch (lzx->block_type) { + case LZX_BLOCKTYPE_ALIGNED: + /* read lengths of and build aligned huffman decoding tree */ + for (i = 0; i < 8; i++) { READ_BITS(j, 3); lzx->ALIGNED_len[i] = j; } + BUILD_TABLE(ALIGNED); + /* rest of aligned header is same as verbatim */ /*@fallthrough@*/ + case LZX_BLOCKTYPE_VERBATIM: + /* read lengths of and build main huffman decoding tree */ + READ_LENGTHS(MAINTREE, 0, 256); + READ_LENGTHS(MAINTREE, 256, LZX_NUM_CHARS + lzx->num_offsets); + BUILD_TABLE(MAINTREE); + /* if the literal 0xE8 is anywhere in the block... */ + if (lzx->MAINTREE_len[0xE8] != 0) lzx->intel_started = 1; + /* read lengths of and build lengths huffman decoding tree */ + READ_LENGTHS(LENGTH, 0, LZX_NUM_SECONDARY_LENGTHS); + BUILD_TABLE_MAYBE_EMPTY(LENGTH); + break; - case LZX_BLOCKTYPE_UNCOMPRESSED: - /* because we can't assume otherwise */ - lzx->intel_started = 1; + case LZX_BLOCKTYPE_UNCOMPRESSED: + /* because we can't assume otherwise */ + lzx->intel_started = 1; - /* read 1-16 (not 0-15) bits to align to bytes */ - ENSURE_BITS(16); - if (bits_left > 16) i_ptr -= 2; - bits_left = 0; bit_buffer = 0; + /* read 1-16 (not 0-15) bits to align to bytes */ + if (bits_left == 0) ENSURE_BITS(16); + bits_left = 0; bit_buffer = 0; - /* read 12 bytes of stored R0 / R1 / R2 values */ - for (rundest = &buf[0], i = 0; i < 12; i++) { - if (i_ptr == i_end) { - if (lzxd_read_input(lzx)) return lzx->error; - i_ptr = lzx->i_ptr; - i_end = lzx->i_end; - } - *rundest++ = *i_ptr++; - } - R0 = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24); - R1 = buf[4] | (buf[5] << 8) | (buf[6] << 16) | (buf[7] << 24); - R2 = buf[8] | (buf[9] << 8) | (buf[10] << 16) | (buf[11] << 24); - break; + /* read 12 bytes of stored R0 / R1 / R2 values */ + for (rundest = &buf[0], i = 0; i < 12; i++) { + READ_IF_NEEDED; + *rundest++ = *i_ptr++; + } + R0 = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24); + R1 = buf[4] | (buf[5] << 8) | (buf[6] << 16) | (buf[7] << 24); + R2 = buf[8] | (buf[9] << 8) | (buf[10] << 16) | (buf[11] << 24); + break; - default: - D(("bad block type")) - return lzx->error = MSPACK_ERR_DECRUNCH; - } + default: + D(("bad block type")) + return lzx->error = MSPACK_ERR_DECRUNCH; + } } /* decode more of the block: @@ -613,202 +547,270 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) { /* decode at least this_run bytes */ switch (lzx->block_type) { case LZX_BLOCKTYPE_VERBATIM: - while (this_run > 0) { - READ_HUFFSYM(MAINTREE, main_element); - if (main_element < LZX_NUM_CHARS) { - /* literal: 0 to LZX_NUM_CHARS-1 */ - window[window_posn++] = main_element; - this_run--; - } - else { - /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */ - main_element -= LZX_NUM_CHARS; + while (this_run > 0) { + READ_HUFFSYM(MAINTREE, main_element); + if (main_element < LZX_NUM_CHARS) { + /* literal: 0 to LZX_NUM_CHARS-1 */ + window[window_posn++] = main_element; + this_run--; + } + else { + /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */ + main_element -= LZX_NUM_CHARS; - /* get match length */ - match_length = main_element & LZX_NUM_PRIMARY_LENGTHS; - if (match_length == LZX_NUM_PRIMARY_LENGTHS) { - READ_HUFFSYM(LENGTH, length_footer); - match_length += length_footer; - } - match_length += LZX_MIN_MATCH; - - /* get match offset */ - switch ((match_offset = (main_element >> 3))) { - case 0: match_offset = R0; break; - case 1: match_offset = R1; R1=R0; R0 = match_offset; break; - case 2: match_offset = R2; R2=R0; R0 = match_offset; break; - case 3: match_offset = 1; R2=R1; R1=R0; R0 = match_offset; break; - default: - extra = extra_bits[match_offset]; - READ_BITS(verbatim_bits, extra); - match_offset = position_base[match_offset] - 2 + verbatim_bits; - R2 = R1; R1 = R0; R0 = match_offset; - } + /* get match length */ + match_length = main_element & LZX_NUM_PRIMARY_LENGTHS; + if (match_length == LZX_NUM_PRIMARY_LENGTHS) { + if (lzx->LENGTH_empty) { + D(("LENGTH symbol needed but tree is empty")) + return lzx->error = MSPACK_ERR_DECRUNCH; + } + READ_HUFFSYM(LENGTH, length_footer); + match_length += length_footer; + } + match_length += LZX_MIN_MATCH; - if ((window_posn + match_length) > lzx->window_size) { - D(("match ran over window wrap")) - return lzx->error = MSPACK_ERR_DECRUNCH; - } - - /* copy match */ - rundest = &window[window_posn]; - i = match_length; - /* does match offset wrap the window? */ - if (match_offset > window_posn) { - /* j = length from match offset to end of window */ - j = match_offset - window_posn; - if (j > (int) lzx->window_size) { - D(("match offset beyond window boundaries")) - return lzx->error = MSPACK_ERR_DECRUNCH; - } - runsrc = &window[lzx->window_size - j]; - if (j < i) { - /* if match goes over the window edge, do two copy runs */ - i -= j; while (j-- > 0) *rundest++ = *runsrc++; - runsrc = window; - } - while (i-- > 0) *rundest++ = *runsrc++; - } - else { - runsrc = rundest - match_offset; - while (i-- > 0) *rundest++ = *runsrc++; - } + /* get match offset */ + switch ((match_offset = (main_element >> 3))) { + case 0: match_offset = R0; break; + case 1: match_offset = R1; R1=R0; R0 = match_offset; break; + case 2: match_offset = R2; R2=R0; R0 = match_offset; break; + case 3: match_offset = 1; R2=R1; R1=R0; R0 = match_offset; break; + default: + extra = (match_offset >= 36) ? 17 : extra_bits[match_offset]; + READ_BITS(verbatim_bits, extra); + match_offset = position_base[match_offset] - 2 + verbatim_bits; + R2 = R1; R1 = R0; R0 = match_offset; + } - this_run -= match_length; - window_posn += match_length; - } - } /* while (this_run > 0) */ - break; + /* LZX DELTA uses max match length to signal even longer match */ + if (match_length == LZX_MAX_MATCH && lzx->is_delta) { + int extra_len = 0; + ENSURE_BITS(3); /* 4 entry huffman tree */ + if (PEEK_BITS(1) == 0) { + REMOVE_BITS(1); /* '0' -> 8 extra length bits */ + READ_BITS(extra_len, 8); + } + else if (PEEK_BITS(2) == 2) { + REMOVE_BITS(2); /* '10' -> 10 extra length bits + 0x100 */ + READ_BITS(extra_len, 10); + extra_len += 0x100; + } + else if (PEEK_BITS(3) == 6) { + REMOVE_BITS(3); /* '110' -> 12 extra length bits + 0x500 */ + READ_BITS(extra_len, 12); + extra_len += 0x500; + } + else { + REMOVE_BITS(3); /* '111' -> 15 extra length bits */ + READ_BITS(extra_len, 15); + } + match_length += extra_len; + } + + if ((window_posn + match_length) > lzx->window_size) { + D(("match ran over window wrap")) + return lzx->error = MSPACK_ERR_DECRUNCH; + } + + /* copy match */ + rundest = &window[window_posn]; + i = match_length; + /* does match offset wrap the window? */ + if (match_offset > window_posn) { + if ((off_t)match_offset > lzx->offset && + (match_offset - window_posn) > lzx->ref_data_size) + { + D(("match offset beyond LZX stream")) + return lzx->error = MSPACK_ERR_DECRUNCH; + } + /* j = length from match offset to end of window */ + j = match_offset - window_posn; + if (j > (int) lzx->window_size) { + D(("match offset beyond window boundaries")) + return lzx->error = MSPACK_ERR_DECRUNCH; + } + runsrc = &window[lzx->window_size - j]; + if (j < i) { + /* if match goes over the window edge, do two copy runs */ + i -= j; while (j-- > 0) *rundest++ = *runsrc++; + runsrc = window; + } + while (i-- > 0) *rundest++ = *runsrc++; + } + else { + runsrc = rundest - match_offset; + while (i-- > 0) *rundest++ = *runsrc++; + } + + this_run -= match_length; + window_posn += match_length; + } + } /* while (this_run > 0) */ + break; case LZX_BLOCKTYPE_ALIGNED: - while (this_run > 0) { - READ_HUFFSYM(MAINTREE, main_element); - if (main_element < LZX_NUM_CHARS) { - /* literal: 0 to LZX_NUM_CHARS-1 */ - window[window_posn++] = main_element; - this_run--; - } - else { - /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */ - main_element -= LZX_NUM_CHARS; + while (this_run > 0) { + READ_HUFFSYM(MAINTREE, main_element); + if (main_element < LZX_NUM_CHARS) { + /* literal: 0 to LZX_NUM_CHARS-1 */ + window[window_posn++] = main_element; + this_run--; + } + else { + /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */ + main_element -= LZX_NUM_CHARS; - /* get match length */ - match_length = main_element & LZX_NUM_PRIMARY_LENGTHS; - if (match_length == LZX_NUM_PRIMARY_LENGTHS) { - READ_HUFFSYM(LENGTH, length_footer); - match_length += length_footer; - } - match_length += LZX_MIN_MATCH; + /* get match length */ + match_length = main_element & LZX_NUM_PRIMARY_LENGTHS; + if (match_length == LZX_NUM_PRIMARY_LENGTHS) { + if (lzx->LENGTH_empty) { + D(("LENGTH symbol needed but tree is empty")) + return lzx->error = MSPACK_ERR_DECRUNCH; + } + READ_HUFFSYM(LENGTH, length_footer); + match_length += length_footer; + } + match_length += LZX_MIN_MATCH; - /* get match offset */ - switch ((match_offset = (main_element >> 3))) { - case 0: match_offset = R0; break; - case 1: match_offset = R1; R1 = R0; R0 = match_offset; break; - case 2: match_offset = R2; R2 = R0; R0 = match_offset; break; - default: - extra = extra_bits[match_offset]; - match_offset = position_base[match_offset] - 2; - if (extra > 3) { - /* verbatim and aligned bits */ - extra -= 3; - READ_BITS(verbatim_bits, extra); - match_offset += (verbatim_bits << 3); - READ_HUFFSYM(ALIGNED, aligned_bits); - match_offset += aligned_bits; - } - else if (extra == 3) { - /* aligned bits only */ - READ_HUFFSYM(ALIGNED, aligned_bits); - match_offset += aligned_bits; - } - else if (extra > 0) { /* extra==1, extra==2 */ - /* verbatim bits only */ - READ_BITS(verbatim_bits, extra); - match_offset += verbatim_bits; - } - else /* extra == 0 */ { - /* ??? not defined in LZX specification! */ - match_offset = 1; - } - /* update repeated offset LRU queue */ - R2 = R1; R1 = R0; R0 = match_offset; - } + /* get match offset */ + switch ((match_offset = (main_element >> 3))) { + case 0: match_offset = R0; break; + case 1: match_offset = R1; R1 = R0; R0 = match_offset; break; + case 2: match_offset = R2; R2 = R0; R0 = match_offset; break; + default: + extra = (match_offset >= 36) ? 17 : extra_bits[match_offset]; + match_offset = position_base[match_offset] - 2; + if (extra > 3) { + /* verbatim and aligned bits */ + extra -= 3; + READ_BITS(verbatim_bits, extra); + match_offset += (verbatim_bits << 3); + READ_HUFFSYM(ALIGNED, aligned_bits); + match_offset += aligned_bits; + } + else if (extra == 3) { + /* aligned bits only */ + READ_HUFFSYM(ALIGNED, aligned_bits); + match_offset += aligned_bits; + } + else if (extra > 0) { /* extra==1, extra==2 */ + /* verbatim bits only */ + READ_BITS(verbatim_bits, extra); + match_offset += verbatim_bits; + } + else /* extra == 0 */ { + /* ??? not defined in LZX specification! */ + match_offset = 1; + } + /* update repeated offset LRU queue */ + R2 = R1; R1 = R0; R0 = match_offset; + } - if ((window_posn + match_length) > lzx->window_size) { - D(("match ran over window wrap")) - return lzx->error = MSPACK_ERR_DECRUNCH; - } + /* LZX DELTA uses max match length to signal even longer match */ + if (match_length == LZX_MAX_MATCH && lzx->is_delta) { + int extra_len = 0; + ENSURE_BITS(3); /* 4 entry huffman tree */ + if (PEEK_BITS(1) == 0) { + REMOVE_BITS(1); /* '0' -> 8 extra length bits */ + READ_BITS(extra_len, 8); + } + else if (PEEK_BITS(2) == 2) { + REMOVE_BITS(2); /* '10' -> 10 extra length bits + 0x100 */ + READ_BITS(extra_len, 10); + extra_len += 0x100; + } + else if (PEEK_BITS(3) == 6) { + REMOVE_BITS(3); /* '110' -> 12 extra length bits + 0x500 */ + READ_BITS(extra_len, 12); + extra_len += 0x500; + } + else { + REMOVE_BITS(3); /* '111' -> 15 extra length bits */ + READ_BITS(extra_len, 15); + } + match_length += extra_len; + } - /* copy match */ - rundest = &window[window_posn]; - i = match_length; - /* does match offset wrap the window? */ - if (match_offset > window_posn) { - /* j = length from match offset to end of window */ - j = match_offset - window_posn; - if (j > (int) lzx->window_size) { - D(("match offset beyond window boundaries")) - return lzx->error = MSPACK_ERR_DECRUNCH; - } - runsrc = &window[lzx->window_size - j]; - if (j < i) { - /* if match goes over the window edge, do two copy runs */ - i -= j; while (j-- > 0) *rundest++ = *runsrc++; - runsrc = window; - } - while (i-- > 0) *rundest++ = *runsrc++; - } - else { - runsrc = rundest - match_offset; - while (i-- > 0) *rundest++ = *runsrc++; - } + if ((window_posn + match_length) > lzx->window_size) { + D(("match ran over window wrap")) + return lzx->error = MSPACK_ERR_DECRUNCH; + } - this_run -= match_length; - window_posn += match_length; - } - } /* while (this_run > 0) */ - break; + /* copy match */ + rundest = &window[window_posn]; + i = match_length; + /* does match offset wrap the window? */ + if (match_offset > window_posn) { + if ((off_t)match_offset > lzx->offset && + (match_offset - window_posn) > lzx->ref_data_size) + { + D(("match offset beyond LZX stream")) + return lzx->error = MSPACK_ERR_DECRUNCH; + } + /* j = length from match offset to end of window */ + j = match_offset - window_posn; + if (j > (int) lzx->window_size) { + D(("match offset beyond window boundaries")) + return lzx->error = MSPACK_ERR_DECRUNCH; + } + runsrc = &window[lzx->window_size - j]; + if (j < i) { + /* if match goes over the window edge, do two copy runs */ + i -= j; while (j-- > 0) *rundest++ = *runsrc++; + runsrc = window; + } + while (i-- > 0) *rundest++ = *runsrc++; + } + else { + runsrc = rundest - match_offset; + while (i-- > 0) *rundest++ = *runsrc++; + } + + this_run -= match_length; + window_posn += match_length; + } + } /* while (this_run > 0) */ + break; case LZX_BLOCKTYPE_UNCOMPRESSED: - /* as this_run is limited not to wrap a frame, this also means it - * won't wrap the window (as the window is a multiple of 32k) */ - rundest = &window[window_posn]; - window_posn += this_run; - while (this_run > 0) { - if ((i = (int)(i_end - i_ptr))) { - if (i > this_run) i = this_run; - lzx->sys->copy(i_ptr, rundest, (size_t) i); - rundest += i; - i_ptr += i; - this_run -= i; - } - else { - if (lzxd_read_input(lzx)) return lzx->error; - i_ptr = lzx->i_ptr; - i_end = lzx->i_end; - } - } - break; + /* as this_run is limited not to wrap a frame, this also means it + * won't wrap the window (as the window is a multiple of 32k) */ + rundest = &window[window_posn]; + window_posn += this_run; + while (this_run > 0) { + if ((i = (int)(i_end - i_ptr)) == 0) { + READ_IF_NEEDED; + } + else { + if (i > this_run) i = this_run; + lzx->sys->copy(i_ptr, rundest, (size_t) i); + rundest += i; + i_ptr += i; + this_run -= i; + } + } + break; default: - return lzx->error = MSPACK_ERR_DECRUNCH; /* might as well */ + return lzx->error = MSPACK_ERR_DECRUNCH; /* might as well */ } /* did the final match overrun our desired this_run length? */ if (this_run < 0) { - if ((unsigned int)(-this_run) > lzx->block_remaining) { - D(("overrun went past end of block by %d (%d remaining)", - -this_run, lzx->block_remaining )) - return lzx->error = MSPACK_ERR_DECRUNCH; - } - lzx->block_remaining -= -this_run; + if ((unsigned int)(-this_run) > lzx->block_remaining) { + D(("overrun went past end of block by %d (%d remaining)", + -this_run, lzx->block_remaining )) + return lzx->error = MSPACK_ERR_DECRUNCH; + } + lzx->block_remaining -= -this_run; } } /* while (bytes_todo > 0) */ /* streams don't extend over frame boundaries */ if ((window_posn - lzx->frame_posn) != frame_size) { D(("decode beyond output frame limits! %d != %d", - window_posn - lzx->frame_posn, frame_size)) + window_posn - lzx->frame_posn, frame_size)) return lzx->error = MSPACK_ERR_DECRUNCH; } @@ -818,13 +820,14 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) { /* check that we've used all of the previous frame first */ if (lzx->o_ptr != lzx->o_end) { - D(("%d avail bytes, new %d frame", lzx->o_end-lzx->o_ptr, frame_size)) + D(("%ld avail bytes, new %d frame", + (long)(lzx->o_end - lzx->o_ptr), frame_size)) return lzx->error = MSPACK_ERR_DECRUNCH; } /* does this intel block _really_ need decoding? */ if (lzx->intel_started && lzx->intel_filesize && - (lzx->frame <= 32768) && (frame_size > 10)) + (lzx->frame <= 32768) && (frame_size > 10)) { unsigned char *data = &lzx->e8_buf[0]; unsigned char *dataend = &lzx->e8_buf[frame_size - 10]; @@ -837,17 +840,17 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) { lzx->sys->copy(&lzx->window[lzx->frame_posn], data, frame_size); while (data < dataend) { - if (*data++ != 0xE8) { curpos++; continue; } - abs_off = data[0] | (data[1]<<8) | (data[2]<<16) | (data[3]<<24); - if ((abs_off >= -curpos) && (abs_off < filesize)) { - rel_off = (abs_off >= 0) ? abs_off - curpos : abs_off + filesize; - data[0] = (unsigned char) rel_off; - data[1] = (unsigned char) (rel_off >> 8); - data[2] = (unsigned char) (rel_off >> 16); - data[3] = (unsigned char) (rel_off >> 24); - } - data += 4; - curpos += 5; + if (*data++ != 0xE8) { curpos++; continue; } + abs_off = data[0] | (data[1]<<8) | (data[2]<<16) | (data[3]<<24); + if ((abs_off >= -curpos) && (abs_off < filesize)) { + rel_off = (abs_off >= 0) ? abs_off - curpos : abs_off + filesize; + data[0] = (unsigned char) rel_off; + data[1] = (unsigned char) (rel_off >> 8); + data[2] = (unsigned char) (rel_off >> 16); + data[3] = (unsigned char) (rel_off >> 24); + } + data += 4; + curpos += 5; } lzx->intel_curpos += frame_size; } diff --git a/third_party/mspack/mspack.h b/third_party/mspack/mspack.h index 0d2584dee..f9161f983 100644 --- a/third_party/mspack/mspack.h +++ b/third_party/mspack/mspack.h @@ -1,5 +1,5 @@ /* libmspack -- a library for working with Microsoft compression formats. - * (C) 2003-2004 Stuart Caie + * (C) 2003-2016 Stuart Caie * * libmspack is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License (LGPL) version 2.1 @@ -21,6 +21,79 @@ * libmspack is a library which provides compressors and decompressors, * archivers and dearchivers for Microsoft compression formats. * + * \section formats Formats supported + * + * The following file formats are supported: + * - SZDD files, which use LZSS compression + * - KWAJ files, which use LZSS, LZSS+Huffman or deflate compression + * - .HLP (MS Help) files, which use LZSS compression + * - .CAB (MS Cabinet) files, which use deflate, LZX or Quantum compression + * - .CHM (HTML Help) files, which use LZX compression + * - .LIT (MS EBook) files, which use LZX compression and DES encryption + * - .LZX (Exchange Offline Addressbook) files, which use LZX compression + * + * To determine the capabilities of the library, and the binary + * compatibility version of any particular compressor or decompressor, use + * the mspack_version() function. The UNIX library interface version is + * defined as the highest-versioned library component. + * + * \section starting Getting started + * + * The macro MSPACK_SYS_SELFTEST() should be used to ensure the library can + * be used. In particular, it checks if the caller is using 32-bit file I/O + * when the library is compiled for 64-bit file I/O and vice versa. + * + * If compiled normally, the library includes basic file I/O and memory + * management functionality using the standard C library. This can be + * customised and replaced entirely by creating a mspack_system structure. + * + * A compressor or decompressor for the required format must be + * instantiated before it can be used. Each construction function takes + * one parameter, which is either a pointer to a custom mspack_system + * structure, or NULL to use the default. The instantiation returned, if + * not NULL, contains function pointers (methods) to work with the given + * file format. + * + * For compression: + * - mspack_create_cab_compressor() creates a mscab_compressor + * - mspack_create_chm_compressor() creates a mschm_compressor + * - mspack_create_lit_compressor() creates a mslit_compressor + * - mspack_create_hlp_compressor() creates a mshlp_compressor + * - mspack_create_szdd_compressor() creates a msszdd_compressor + * - mspack_create_kwaj_compressor() creates a mskwaj_compressor + * - mspack_create_oab_compressor() creates a msoab_compressor + * + * For decompression: + * - mspack_create_cab_decompressor() creates a mscab_decompressor + * - mspack_create_chm_decompressor() creates a mschm_decompressor + * - mspack_create_lit_decompressor() creates a mslit_decompressor + * - mspack_create_hlp_decompressor() creates a mshlp_decompressor + * - mspack_create_szdd_decompressor() creates a msszdd_decompressor + * - mspack_create_kwaj_decompressor() creates a mskwaj_decompressor + * - mspack_create_oab_decompressor() creates a msoab_decompressor + * + * Once finished working with a format, each kind of + * compressor/decompressor has its own specific destructor: + * - mspack_destroy_cab_compressor() + * - mspack_destroy_cab_decompressor() + * - mspack_destroy_chm_compressor() + * - mspack_destroy_chm_decompressor() + * - mspack_destroy_lit_compressor() + * - mspack_destroy_lit_decompressor() + * - mspack_destroy_hlp_compressor() + * - mspack_destroy_hlp_decompressor() + * - mspack_destroy_szdd_compressor() + * - mspack_destroy_szdd_decompressor() + * - mspack_destroy_kwaj_compressor() + * - mspack_destroy_kwaj_decompressor() + * - mspack_destroy_oab_compressor() + * - mspack_destroy_oab_decompressor() + * + * Destroying a compressor or decompressor does not destroy any objects, + * structures or handles that have been created using that compressor or + * decompressor. Ensure that everything created or opened is destroyed or + * closed before compressor/decompressor is itself destroyed. + * * \section errors Error codes * * All compressors and decompressors use the same set of error codes. Most @@ -45,6 +118,41 @@ * - #MSPACK_ERR_CHECKSUM indicates that a data checksum has failed. * - #MSPACK_ERR_CRUNCH indicates an error occured during compression. * - #MSPACK_ERR_DECRUNCH indicates an error occured during decompression. + * + * \section threading Multi-threading + * + * libmspack methods are reentrant and multithreading-safe when each + * thread has its own compressor or decompressor. + + * You should not call multiple methods simultaneously on a single + * compressor or decompressor instance. + * + * If this may happen, you can either use one compressor or + * decompressor per thread, or you can use your preferred lock, + * semaphore or mutex library to ensure no more than one method on a + * compressor/decompressor is called simultaneously. libmspack will + * not do this locking for you. + * + * Example of incorrect behaviour: + * - thread 1 calls mspack_create_cab_decompressor() + * - thread 1 calls open() + * - thread 1 calls extract() for one file + * - thread 2 simultaneously calls extract() for another file + * + * Correct behaviour: + * - thread 1 calls mspack_create_cab_decompressor() + * - thread 2 calls mspack_create_cab_decompressor() + * - thread 1 calls its own open() / extract() + * - thread 2 simultaneously calls its own open() / extract() + * + * Also correct behaviour: + * - thread 1 calls mspack_create_cab_decompressor() + * - thread 1 locks a mutex for with the decompressor before + * calling any methods on it, and unlocks the mutex after each + * method returns. + * - thread 1 can share the results of open() with thread 2, and both + * can call extract(), provided they both guard against simultaneous + * use of extract(), and any other methods, with the mutex */ #ifndef LIB_MSPACK_H @@ -57,6 +165,102 @@ extern "C" { #include #include +/** + * System self-test function, to ensure both library and calling program + * can use one another. + * + * A result of MSPACK_ERR_OK means the library and caller are + * compatible. Any other result indicates that the library and caller are + * not compatible and should not be used. In particular, a value of + * MSPACK_ERR_SEEK means the library and caller use different off_t + * datatypes. + * + * It should be used like so: + * + * @code + * int selftest_result; + * MSPACK_SYS_SELFTEST(selftest_result); + * if (selftest_result != MSPACK_ERR_OK) { + * fprintf(stderr, "incompatible with this build of libmspack\n"); + * exit(0); + * } + * @endcode + * + * @param result an int variable to store the result of the self-test + */ +#define MSPACK_SYS_SELFTEST(result) do { \ + (result) = mspack_sys_selftest_internal(sizeof(off_t)); \ +} while (0) + +/** Part of the MSPACK_SYS_SELFTEST() macro, must not be used directly. */ +extern int mspack_sys_selftest_internal(int); + +/** + * Enquire about the binary compatibility version of a specific interface in + * the library. Currently, the following interfaces are defined: + * + * - #MSPACK_VER_LIBRARY: the overall library + * - #MSPACK_VER_SYSTEM: the mspack_system interface + * - #MSPACK_VER_MSCABD: the mscab_decompressor interface + * - #MSPACK_VER_MSCABC: the mscab_compressor interface + * - #MSPACK_VER_MSCHMD: the mschm_decompressor interface + * - #MSPACK_VER_MSCHMC: the mschm_compressor interface + * - #MSPACK_VER_MSLITD: the mslit_decompressor interface + * - #MSPACK_VER_MSLITC: the mslit_compressor interface + * - #MSPACK_VER_MSHLPD: the mshlp_decompressor interface + * - #MSPACK_VER_MSHLPC: the mshlp_compressor interface + * - #MSPACK_VER_MSSZDDD: the msszdd_decompressor interface + * - #MSPACK_VER_MSSZDDC: the msszdd_compressor interface + * - #MSPACK_VER_MSKWAJD: the mskwaj_decompressor interface + * - #MSPACK_VER_MSKWAJC: the mskwaj_compressor interface + * - #MSPACK_VER_MSOABD: the msoab_decompressor interface + * - #MSPACK_VER_MSOABC: the msoab_compressor interface + * + * The result of the function should be interpreted as follows: + * - -1: this interface is completely unknown to the library + * - 0: this interface is known, but non-functioning + * - 1: this interface has all basic functionality + * - 2, 3, ...: this interface has additional functionality, clearly marked + * in the documentation as "version 2", "version 3" and so on. + * + * @param entity the interface to request current version of + * @return the version of the requested interface + */ +extern int mspack_version(int entity); + +/** Pass to mspack_version() to get the overall library version */ +#define MSPACK_VER_LIBRARY (0) +/** Pass to mspack_version() to get the mspack_system version */ +#define MSPACK_VER_SYSTEM (1) +/** Pass to mspack_version() to get the mscab_decompressor version */ +#define MSPACK_VER_MSCABD (2) +/** Pass to mspack_version() to get the mscab_compressor version */ +#define MSPACK_VER_MSCABC (3) +/** Pass to mspack_version() to get the mschm_decompressor version */ +#define MSPACK_VER_MSCHMD (4) +/** Pass to mspack_version() to get the mschm_compressor version */ +#define MSPACK_VER_MSCHMC (5) +/** Pass to mspack_version() to get the mslit_decompressor version */ +#define MSPACK_VER_MSLITD (6) +/** Pass to mspack_version() to get the mslit_compressor version */ +#define MSPACK_VER_MSLITC (7) +/** Pass to mspack_version() to get the mshlp_decompressor version */ +#define MSPACK_VER_MSHLPD (8) +/** Pass to mspack_version() to get the mshlp_compressor version */ +#define MSPACK_VER_MSHLPC (9) +/** Pass to mspack_version() to get the msszdd_decompressor version */ +#define MSPACK_VER_MSSZDDD (10) +/** Pass to mspack_version() to get the msszdd_compressor version */ +#define MSPACK_VER_MSSZDDC (11) +/** Pass to mspack_version() to get the mskwaj_decompressor version */ +#define MSPACK_VER_MSKWAJD (12) +/** Pass to mspack_version() to get the mskwaj_compressor version */ +#define MSPACK_VER_MSKWAJC (13) +/** Pass to mspack_version() to get the msoab_decompressor version */ +#define MSPACK_VER_MSOABD (14) +/** Pass to mspack_version() to get the msoab_compressor version */ +#define MSPACK_VER_MSOABC (15) + /* --- file I/O abstraction ------------------------------------------------ */ /** @@ -82,7 +286,7 @@ struct mspack_system { /** * Opens a file for reading, writing, appending or updating. * - * @param this a self-referential pointer to the mspack_system + * @param self a self-referential pointer to the mspack_system * structure whose open() method is being called. If * this pointer is required by close(), read(), write(), * seek() or tell(), it should be stored in the result @@ -99,12 +303,13 @@ struct mspack_system { * @return a pointer to a mspack_file structure. This structure officially * contains no members, its true contents are up to the * mspack_system implementor. It should contain whatever is needed - * for other mspack_system methods to operate. + * for other mspack_system methods to operate. Returning the NULL + * pointer indicates an error condition. * @see close(), read(), write(), seek(), tell(), message() */ - struct mspack_file * (*open)(struct mspack_system *sys, - char *filename, - int mode); + struct mspack_file * (*open)(struct mspack_system *self, + const char *filename, + int mode); /** * Closes a previously opened file. If any memory was allocated for this @@ -123,12 +328,14 @@ struct mspack_system { * @param bytes the number of bytes to read from the file. * @return the number of bytes successfully read (this can be less than * the number requested), zero to mark the end of file, or less - * than zero to indicate an error. + * than zero to indicate an error. The library does not "retry" + * reads and assumes short reads are due to EOF, so you should + * avoid returning short reads because of transient errors. * @see open(), write() */ int (*read)(struct mspack_file *file, - void *buffer, - int bytes); + void *buffer, + int bytes); /** * Writes a given number of bytes to an open file. @@ -144,8 +351,8 @@ struct mspack_system { * @see open(), read() */ int (*write)(struct mspack_file *file, - void *buffer, - int bytes); + void *buffer, + int bytes); /** * Seeks to a specific file offset within an open file. @@ -171,8 +378,8 @@ struct mspack_system { * @see open(), tell() */ int (*seek)(struct mspack_file *file, - off_t offset, - int mode); + off_t offset, + int mode); /** * Returns the current file position (in bytes) of the given file. @@ -198,26 +405,26 @@ struct mspack_system { * @see open() */ void (*message)(struct mspack_file *file, - char *format, - ...); + const char *format, + ...); /** * Allocates memory. * - * @param sys a self-referential pointer to the mspack_system + * @param self a self-referential pointer to the mspack_system * structure whose alloc() method is being called. * @param bytes the number of bytes to allocate * @result a pointer to the requested number of bytes, or NULL if * not enough memory is available * @see free() */ - void * (*alloc)(struct mspack_system *sys, - size_t bytes); + void * (*alloc)(struct mspack_system *self, + size_t bytes); /** * Frees memory. * - * @param ptr the memory to be freed. + * @param ptr the memory to be freed. NULL is accepted and ignored. * @see alloc() */ void (*free)(void *ptr); @@ -235,8 +442,8 @@ struct mspack_system { * @param bytes the size of the memory region, in bytes */ void (*copy)(void *src, - void *dest, - size_t bytes); + void *dest, + size_t bytes); /** * A null pointer to mark the end of mspack_system. It must equal NULL. @@ -299,8 +506,1857 @@ struct mspack_file { /** Error code: error during decompression */ #define MSPACK_ERR_DECRUNCH (11) -#ifdef __cplusplus +/* --- functions available in library -------------------------------------- */ + +/** Creates a new CAB compressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #mscab_compressor or NULL + */ +extern struct mscab_compressor * + mspack_create_cab_compressor(struct mspack_system *sys); + +/** Creates a new CAB decompressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #mscab_decompressor or NULL + */ +extern struct mscab_decompressor * + mspack_create_cab_decompressor(struct mspack_system *sys); + +/** Destroys an existing CAB compressor. + * @param self the #mscab_compressor to destroy + */ +extern void mspack_destroy_cab_compressor(struct mscab_compressor *self); + +/** Destroys an existing CAB decompressor. + * @param self the #mscab_decompressor to destroy + */ +extern void mspack_destroy_cab_decompressor(struct mscab_decompressor *self); + + +/** Creates a new CHM compressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #mschm_compressor or NULL + */ +extern struct mschm_compressor * + mspack_create_chm_compressor(struct mspack_system *sys); + +/** Creates a new CHM decompressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #mschm_decompressor or NULL + */ +extern struct mschm_decompressor * + mspack_create_chm_decompressor(struct mspack_system *sys); + +/** Destroys an existing CHM compressor. + * @param self the #mschm_compressor to destroy + */ +extern void mspack_destroy_chm_compressor(struct mschm_compressor *self); + +/** Destroys an existing CHM decompressor. + * @param self the #mschm_decompressor to destroy + */ +extern void mspack_destroy_chm_decompressor(struct mschm_decompressor *self); + + +/** Creates a new LIT compressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #mslit_compressor or NULL + */ +extern struct mslit_compressor * + mspack_create_lit_compressor(struct mspack_system *sys); + +/** Creates a new LIT decompressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #mslit_decompressor or NULL + */ +extern struct mslit_decompressor * + mspack_create_lit_decompressor(struct mspack_system *sys); + +/** Destroys an existing LIT compressor. + * @param self the #mslit_compressor to destroy + */ +extern void mspack_destroy_lit_compressor(struct mslit_compressor *self); + +/** Destroys an existing LIT decompressor. + * @param self the #mslit_decompressor to destroy + */ +extern void mspack_destroy_lit_decompressor(struct mslit_decompressor *self); + + +/** Creates a new HLP compressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #mshlp_compressor or NULL + */ +extern struct mshlp_compressor * + mspack_create_hlp_compressor(struct mspack_system *sys); + +/** Creates a new HLP decompressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #mshlp_decompressor or NULL + */ +extern struct mshlp_decompressor * + mspack_create_hlp_decompressor(struct mspack_system *sys); + +/** Destroys an existing hlp compressor. + * @param self the #mshlp_compressor to destroy + */ +extern void mspack_destroy_hlp_compressor(struct mshlp_compressor *self); + +/** Destroys an existing hlp decompressor. + * @param self the #mshlp_decompressor to destroy + */ +extern void mspack_destroy_hlp_decompressor(struct mshlp_decompressor *self); + + +/** Creates a new SZDD compressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #msszdd_compressor or NULL + */ +extern struct msszdd_compressor * + mspack_create_szdd_compressor(struct mspack_system *sys); + +/** Creates a new SZDD decompressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #msszdd_decompressor or NULL + */ +extern struct msszdd_decompressor * + mspack_create_szdd_decompressor(struct mspack_system *sys); + +/** Destroys an existing SZDD compressor. + * @param self the #msszdd_compressor to destroy + */ +extern void mspack_destroy_szdd_compressor(struct msszdd_compressor *self); + +/** Destroys an existing SZDD decompressor. + * @param self the #msszdd_decompressor to destroy + */ +extern void mspack_destroy_szdd_decompressor(struct msszdd_decompressor *self); + + +/** Creates a new KWAJ compressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #mskwaj_compressor or NULL + */ +extern struct mskwaj_compressor * + mspack_create_kwaj_compressor(struct mspack_system *sys); + +/** Creates a new KWAJ decompressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #mskwaj_decompressor or NULL + */ +extern struct mskwaj_decompressor * + mspack_create_kwaj_decompressor(struct mspack_system *sys); + +/** Destroys an existing KWAJ compressor. + * @param self the #mskwaj_compressor to destroy + */ +extern void mspack_destroy_kwaj_compressor(struct mskwaj_compressor *self); + +/** Destroys an existing KWAJ decompressor. + * @param self the #mskwaj_decompressor to destroy + */ +extern void mspack_destroy_kwaj_decompressor(struct mskwaj_decompressor *self); + + +/** Creates a new OAB compressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #msoab_compressor or NULL + */ +extern struct msoab_compressor * + mspack_create_oab_compressor(struct mspack_system *sys); + +/** Creates a new OAB decompressor. + * @param sys a custom mspack_system structure, or NULL to use the default + * @return a #msoab_decompressor or NULL + */ +extern struct msoab_decompressor * + mspack_create_oab_decompressor(struct mspack_system *sys); + +/** Destroys an existing OAB compressor. + * @param self the #msoab_compressor to destroy + */ +extern void mspack_destroy_oab_compressor(struct msoab_compressor *self); + +/** Destroys an existing OAB decompressor. + * @param self the #msoab_decompressor to destroy + */ +extern void mspack_destroy_oab_decompressor(struct msoab_decompressor *self); + + +/* --- support for .CAB (MS Cabinet) file format --------------------------- */ + +/** + * A structure which represents a single cabinet file. + * + * All fields are READ ONLY. + * + * If this cabinet is part of a merged cabinet set, the #files and #folders + * fields are common to all cabinets in the set, and will be identical. + * + * @see mscab_decompressor::open(), mscab_decompressor::close(), + * mscab_decompressor::search() + */ +struct mscabd_cabinet { + /** + * The next cabinet in a chained list, if this cabinet was opened with + * mscab_decompressor::search(). May be NULL to mark the end of the + * list. + */ + struct mscabd_cabinet *next; + + /** + * The filename of the cabinet. More correctly, the filename of the + * physical file that the cabinet resides in. This is given by the + * library user and may be in any format. + */ + const char *filename; + + /** The file offset of cabinet within the physical file it resides in. */ + off_t base_offset; + + /** The length of the cabinet file in bytes. */ + unsigned int length; + + /** The previous cabinet in a cabinet set, or NULL. */ + struct mscabd_cabinet *prevcab; + + /** The next cabinet in a cabinet set, or NULL. */ + struct mscabd_cabinet *nextcab; + + /** The filename of the previous cabinet in a cabinet set, or NULL. */ + char *prevname; + + /** The filename of the next cabinet in a cabinet set, or NULL. */ + char *nextname; + + /** The name of the disk containing the previous cabinet in a cabinet + * set, or NULL. + */ + char *previnfo; + + /** The name of the disk containing the next cabinet in a cabinet set, + * or NULL. + */ + char *nextinfo; + + /** A list of all files in the cabinet or cabinet set. */ + struct mscabd_file *files; + + /** A list of all folders in the cabinet or cabinet set. */ + struct mscabd_folder *folders; + + /** + * The set ID of the cabinet. All cabinets in the same set should have + * the same set ID. + */ + unsigned short set_id; + + /** + * The index number of the cabinet within the set. Numbering should + * start from 0 for the first cabinet in the set, and increment by 1 for + * each following cabinet. + */ + unsigned short set_index; + + /** + * The number of bytes reserved in the header area of the cabinet. + * + * If this is non-zero and flags has MSCAB_HDR_RESV set, this data can + * be read by the calling application. It is of the given length, + * located at offset (base_offset + MSCAB_HDR_RESV_OFFSET) in the + * cabinet file. + * + * @see flags + */ + unsigned short header_resv; + + /** + * Header flags. + * + * - MSCAB_HDR_PREVCAB indicates the cabinet is part of a cabinet set, and + * has a predecessor cabinet. + * - MSCAB_HDR_NEXTCAB indicates the cabinet is part of a cabinet set, and + * has a successor cabinet. + * - MSCAB_HDR_RESV indicates the cabinet has reserved header space. + * + * @see prevname, previnfo, nextname, nextinfo, header_resv + */ + int flags; }; + +/** Offset from start of cabinet to the reserved header data (if present). */ +#define MSCAB_HDR_RESV_OFFSET (0x28) + +/** Cabinet header flag: cabinet has a predecessor */ +#define MSCAB_HDR_PREVCAB (0x01) +/** Cabinet header flag: cabinet has a successor */ +#define MSCAB_HDR_NEXTCAB (0x02) +/** Cabinet header flag: cabinet has reserved header space */ +#define MSCAB_HDR_RESV (0x04) + +/** + * A structure which represents a single folder in a cabinet or cabinet set. + * + * All fields are READ ONLY. + * + * A folder is a single compressed stream of data. When uncompressed, it + * holds the data of one or more files. A folder may be split across more + * than one cabinet. + */ +struct mscabd_folder { + /** + * A pointer to the next folder in this cabinet or cabinet set, or NULL + * if this is the final folder. + */ + struct mscabd_folder *next; + + /** + * The compression format used by this folder. + * + * The macro MSCABD_COMP_METHOD() should be used on this field to get + * the algorithm used. The macro MSCABD_COMP_LEVEL() should be used to get + * the "compression level". + * + * @see MSCABD_COMP_METHOD(), MSCABD_COMP_LEVEL() + */ + int comp_type; + + /** + * The total number of data blocks used by this folder. This includes + * data blocks present in other files, if this folder spans more than + * one cabinet. + */ + unsigned int num_blocks; +}; + +/** + * Returns the compression method used by a folder. + * + * @param comp_type a mscabd_folder::comp_type value + * @return one of #MSCAB_COMP_NONE, #MSCAB_COMP_MSZIP, #MSCAB_COMP_QUANTUM + * or #MSCAB_COMP_LZX + */ +#define MSCABD_COMP_METHOD(comp_type) ((comp_type) & 0x0F) +/** + * Returns the compression level used by a folder. + * + * @param comp_type a mscabd_folder::comp_type value + * @return the compression level. This is only defined by LZX and Quantum + * compression + */ +#define MSCABD_COMP_LEVEL(comp_type) (((comp_type) >> 8) & 0x1F) + +/** Compression mode: no compression. */ +#define MSCAB_COMP_NONE (0) +/** Compression mode: MSZIP (deflate) compression. */ +#define MSCAB_COMP_MSZIP (1) +/** Compression mode: Quantum compression */ +#define MSCAB_COMP_QUANTUM (2) +/** Compression mode: LZX compression */ +#define MSCAB_COMP_LZX (3) + +/** + * A structure which represents a single file in a cabinet or cabinet set. + * + * All fields are READ ONLY. + */ +struct mscabd_file { + /** + * The next file in the cabinet or cabinet set, or NULL if this is the + * final file. + */ + struct mscabd_file *next; + + /** + * The filename of the file. + * + * A null terminated string of up to 255 bytes in length, it may be in + * either ISO-8859-1 or UTF8 format, depending on the file attributes. + * + * @see attribs + */ + char *filename; + + /** The uncompressed length of the file, in bytes. */ + unsigned int length; + + /** + * File attributes. + * + * The following attributes are defined: + * - #MSCAB_ATTRIB_RDONLY indicates the file is write protected. + * - #MSCAB_ATTRIB_HIDDEN indicates the file is hidden. + * - #MSCAB_ATTRIB_SYSTEM indicates the file is a operating system file. + * - #MSCAB_ATTRIB_ARCH indicates the file is "archived". + * - #MSCAB_ATTRIB_EXEC indicates the file is an executable program. + * - #MSCAB_ATTRIB_UTF_NAME indicates the filename is in UTF8 format rather + * than ISO-8859-1. + */ + int attribs; + + /** File's last modified time, hour field. */ + char time_h; + /** File's last modified time, minute field. */ + char time_m; + /** File's last modified time, second field. */ + char time_s; + + /** File's last modified date, day field. */ + char date_d; + /** File's last modified date, month field. */ + char date_m; + /** File's last modified date, year field. */ + int date_y; + + /** A pointer to the folder that contains this file. */ + struct mscabd_folder *folder; + + /** The uncompressed offset of this file in its folder. */ + unsigned int offset; +}; + +/** mscabd_file::attribs attribute: file is read-only. */ +#define MSCAB_ATTRIB_RDONLY (0x01) +/** mscabd_file::attribs attribute: file is hidden. */ +#define MSCAB_ATTRIB_HIDDEN (0x02) +/** mscabd_file::attribs attribute: file is an operating system file. */ +#define MSCAB_ATTRIB_SYSTEM (0x04) +/** mscabd_file::attribs attribute: file is "archived". */ +#define MSCAB_ATTRIB_ARCH (0x20) +/** mscabd_file::attribs attribute: file is an executable program. */ +#define MSCAB_ATTRIB_EXEC (0x40) +/** mscabd_file::attribs attribute: filename is UTF8, not ISO-8859-1. */ +#define MSCAB_ATTRIB_UTF_NAME (0x80) + +/** mscab_decompressor::set_param() parameter: search buffer size. */ +#define MSCABD_PARAM_SEARCHBUF (0) +/** mscab_decompressor::set_param() parameter: repair MS-ZIP streams? */ +#define MSCABD_PARAM_FIXMSZIP (1) +/** mscab_decompressor::set_param() parameter: size of decompression buffer */ +#define MSCABD_PARAM_DECOMPBUF (2) +/** mscab_decompressor::set_param() parameter: salvage data from bad cabinets? + * If enabled, open() will skip file with bad folder indices or filenames + * rather than reject the whole cabinet, and extract() will limit rather than + * reject files with invalid offsets and lengths, and bad data block checksums + * will be ignored. Available only in CAB decoder version 2 and above. + */ +#define MSCABD_PARAM_SALVAGE (3) + +/** TODO */ +struct mscab_compressor { + int dummy; +}; + +/** + * A decompressor for .CAB (Microsoft Cabinet) files + * + * All fields are READ ONLY. + * + * @see mspack_create_cab_decompressor(), mspack_destroy_cab_decompressor() + */ +struct mscab_decompressor { + /** + * Opens a cabinet file and reads its contents. + * + * If the file opened is a valid cabinet file, all headers will be read + * and a mscabd_cabinet structure will be returned, with a full list of + * folders and files. + * + * In the case of an error occuring, NULL is returned and the error code + * is available from last_error(). + * + * The filename pointer should be considered "in use" until close() is + * called on the cabinet. + * + * @param self a self-referential pointer to the mscab_decompressor + * instance being called + * @param filename the filename of the cabinet file. This is passed + * directly to mspack_system::open(). + * @return a pointer to a mscabd_cabinet structure, or NULL on failure + * @see close(), search(), last_error() + */ + struct mscabd_cabinet * (*open) (struct mscab_decompressor *self, + const char *filename); + + /** + * Closes a previously opened cabinet or cabinet set. + * + * This closes a cabinet, all cabinets associated with it via the + * mscabd_cabinet::next, mscabd_cabinet::prevcab and + * mscabd_cabinet::nextcab pointers, and all folders and files. All + * memory used by these entities is freed. + * + * The cabinet pointer is now invalid and cannot be used again. All + * mscabd_folder and mscabd_file pointers from that cabinet or cabinet + * set are also now invalid, and cannot be used again. + * + * If the cabinet pointer given was created using search(), it MUST be + * the cabinet pointer returned by search() and not one of the later + * cabinet pointers further along the mscabd_cabinet::next chain. + + * If extra cabinets have been added using append() or prepend(), these + * will all be freed, even if the cabinet pointer given is not the first + * cabinet in the set. Do NOT close() more than one cabinet in the set. + * + * The mscabd_cabinet::filename is not freed by the library, as it is + * not allocated by the library. The caller should free this itself if + * necessary, before it is lost forever. + * + * @param self a self-referential pointer to the mscab_decompressor + * instance being called + * @param cab the cabinet to close + * @see open(), search(), append(), prepend() + */ + void (*close)(struct mscab_decompressor *self, + struct mscabd_cabinet *cab); + + /** + * Searches a regular file for embedded cabinets. + * + * This opens a normal file with the given filename and will search the + * entire file for embedded cabinet files + * + * If any cabinets are found, the equivalent of open() is called on each + * potential cabinet file at the offset it was found. All successfully + * open()ed cabinets are kept in a list. + * + * The first cabinet found will be returned directly as the result of + * this method. Any further cabinets found will be chained in a list + * using the mscabd_cabinet::next field. + * + * In the case of an error occuring anywhere other than the simulated + * open(), NULL is returned and the error code is available from + * last_error(). + * + * If no error occurs, but no cabinets can be found in the file, NULL is + * returned and last_error() returns MSPACK_ERR_OK. + * + * The filename pointer should be considered in use until close() is + * called on the cabinet. + * + * close() should only be called on the result of search(), not on any + * subsequent cabinets in the mscabd_cabinet::next chain. + * + * @param self a self-referential pointer to the mscab_decompressor + * instance being called + * @param filename the filename of the file to search for cabinets. This + * is passed directly to mspack_system::open(). + * @return a pointer to a mscabd_cabinet structure, or NULL + * @see close(), open(), last_error() + */ + struct mscabd_cabinet * (*search) (struct mscab_decompressor *self, + const char *filename); + + /** + * Appends one mscabd_cabinet to another, forming or extending a cabinet + * set. + * + * This will attempt to append one cabinet to another such that + * (cab->nextcab == nextcab) && (nextcab->prevcab == cab) and + * any folders split between the two cabinets are merged. + * + * The cabinets MUST be part of a cabinet set -- a cabinet set is a + * cabinet that spans more than one physical cabinet file on disk -- and + * must be appropriately matched. + * + * It can be determined if a cabinet has further parts to load by + * examining the mscabd_cabinet::flags field: + * + * - if (flags & MSCAB_HDR_PREVCAB) is non-zero, there is a + * predecessor cabinet to open() and prepend(). Its MS-DOS + * case-insensitive filename is mscabd_cabinet::prevname + * - if (flags & MSCAB_HDR_NEXTCAB) is non-zero, there is a + * successor cabinet to open() and append(). Its MS-DOS case-insensitive + * filename is mscabd_cabinet::nextname + * + * If the cabinets do not match, an error code will be returned. Neither + * cabinet has been altered, and both should be closed seperately. + * + * Files and folders in a cabinet set are a single entity. All cabinets + * in a set use the same file list, which is updated as cabinets in the + * set are added. All pointers to mscabd_folder and mscabd_file + * structures in either cabinet must be discarded and re-obtained after + * merging. + * + * @param self a self-referential pointer to the mscab_decompressor + * instance being called + * @param cab the cabinet which will be appended to, + * predecessor of nextcab + * @param nextcab the cabinet which will be appended, + * successor of cab + * @return an error code, or MSPACK_ERR_OK if successful + * @see prepend(), open(), close() + */ + int (*append) (struct mscab_decompressor *self, + struct mscabd_cabinet *cab, + struct mscabd_cabinet *nextcab); + + /** + * Prepends one mscabd_cabinet to another, forming or extending a + * cabinet set. + * + * This will attempt to prepend one cabinet to another, such that + * (cab->prevcab == prevcab) && (prevcab->nextcab == cab). In + * all other respects, it is identical to append(). See append() for the + * full documentation. + * + * @param self a self-referential pointer to the mscab_decompressor + * instance being called + * @param cab the cabinet which will be prepended to, + * successor of prevcab + * @param prevcab the cabinet which will be prepended, + * predecessor of cab + * @return an error code, or MSPACK_ERR_OK if successful + * @see append(), open(), close() + */ + int (*prepend) (struct mscab_decompressor *self, + struct mscabd_cabinet *cab, + struct mscabd_cabinet *prevcab); + + /** + * Extracts a file from a cabinet or cabinet set. + * + * This extracts a compressed file in a cabinet and writes it to the given + * filename. + * + * The MS-DOS filename of the file, mscabd_file::filename, is NOT USED + * by extract(). The caller must examine this MS-DOS filename, copy and + * change it as necessary, create directories as necessary, and provide + * the correct filename as a parameter, which will be passed unchanged + * to the decompressor's mspack_system::open() + * + * If the file belongs to a split folder in a multi-part cabinet set, + * and not enough parts of the cabinet set have been loaded and appended + * or prepended, an error will be returned immediately. + * + * @param self a self-referential pointer to the mscab_decompressor + * instance being called + * @param file the file to be decompressed + * @param filename the filename of the file being written to + * @return an error code, or MSPACK_ERR_OK if successful + */ + int (*extract)(struct mscab_decompressor *self, + struct mscabd_file *file, + const char *filename); + + /** + * Sets a CAB decompression engine parameter. + * + * The following parameters are defined: + * - #MSCABD_PARAM_SEARCHBUF: How many bytes should be allocated as a + * buffer when using search()? The minimum value is 4. The default + * value is 32768. + * - #MSCABD_PARAM_FIXMSZIP: If non-zero, extract() will ignore bad + * checksums and recover from decompression errors in MS-ZIP + * compressed folders. The default value is 0 (don't recover). + * - #MSCABD_PARAM_DECOMPBUF: How many bytes should be used as an input + * bit buffer by decompressors? The minimum value is 4. The default + * value is 4096. + * + * @param self a self-referential pointer to the mscab_decompressor + * instance being called + * @param param the parameter to set + * @param value the value to set the parameter to + * @return MSPACK_ERR_OK if all is OK, or MSPACK_ERR_ARGS if there + * is a problem with either parameter or value. + * @see search(), extract() + */ + int (*set_param)(struct mscab_decompressor *self, + int param, + int value); + + /** + * Returns the error code set by the most recently called method. + * + * This is useful for open() and search(), which do not return an error + * code directly. + * + * @param self a self-referential pointer to the mscab_decompressor + * instance being called + * @return the most recent error code + * @see open(), search() + */ + int (*last_error)(struct mscab_decompressor *self); +}; + +/* --- support for .CHM (HTMLHelp) file format ----------------------------- */ + +/** + * A structure which represents a file to be placed in a CHM helpfile. + * + * A contiguous array of these structures should be passed to + * mschm_compressor::generate(). The array list is terminated with an + * entry whose mschmc_file::section field is set to #MSCHMC_ENDLIST, the + * other fields in this entry are ignored. + */ +struct mschmc_file { + /** One of #MSCHMC_ENDLIST, #MSCHMC_UNCOMP or #MSCHMC_MSCOMP. */ + int section; + + /** The filename of the source file that will be added to the CHM. This + * is passed directly to mspack_system::open(). */ + const char *filename; + + /** The full path and filename of the file within the CHM helpfile, a + * UTF-1 encoded null-terminated string. */ + char *chm_filename; + + /** The length of the file, in bytes. This will be adhered to strictly + * and a read error will be issued if this many bytes cannot be read + * from the real file at CHM generation time. */ + off_t length; +}; + +/** + * A structure which represents a section of a CHM helpfile. + * + * All fields are READ ONLY. + * + * Not used directly, but used as a generic base type for + * mschmd_sec_uncompressed and mschmd_sec_mscompressed. + */ +struct mschmd_section { + /** A pointer to the CHM helpfile that contains this section. */ + struct mschmd_header *chm; + + /** + * The section ID. Either 0 for the uncompressed section + * mschmd_sec_uncompressed, or 1 for the LZX compressed section + * mschmd_sec_mscompressed. No other section IDs are known. + */ + unsigned int id; +}; + +/** + * A structure which represents the uncompressed section of a CHM helpfile. + * + * All fields are READ ONLY. + */ +struct mschmd_sec_uncompressed { + /** Generic section data. */ + struct mschmd_section base; + + /** The file offset of where this section begins in the CHM helpfile. */ + off_t offset; +}; + +/** + * A structure which represents the LZX compressed section of a CHM helpfile. + * + * All fields are READ ONLY. + */ +struct mschmd_sec_mscompressed { + /** Generic section data. */ + struct mschmd_section base; + + /** A pointer to the meta-file which represents all LZX compressed data. */ + struct mschmd_file *content; + + /** A pointer to the file which contains the LZX control data. */ + struct mschmd_file *control; + + /** A pointer to the file which contains the LZX reset table. */ + struct mschmd_file *rtable; + + /** A pointer to the file which contains the LZX span information. + * Available only in CHM decoder version 2 and above. + */ + struct mschmd_file *spaninfo; +}; + +/** + * A structure which represents a CHM helpfile. + * + * All fields are READ ONLY. + */ +struct mschmd_header { + /** The version of the CHM file format used in this file. */ + unsigned int version; + + /** + * The "timestamp" of the CHM helpfile. + * + * It is the lower 32 bits of a 64-bit value representing the number of + * centiseconds since 1601-01-01 00:00:00 UTC, plus 42. It is not useful + * as a timestamp, but it is useful as a semi-unique ID. + */ + unsigned int timestamp; + + /** + * The default Language and Country ID (LCID) of the user who ran the + * HTMLHelp Compiler. This is not the language of the CHM file itself. + */ + unsigned int language; + + /** + * The filename of the CHM helpfile. This is given by the library user + * and may be in any format. + */ + const char *filename; + + /** The length of the CHM helpfile, in bytes. */ + off_t length; + + /** A list of all non-system files in the CHM helpfile. */ + struct mschmd_file *files; + + /** + * A list of all system files in the CHM helpfile. + * + * System files are files which begin with "::". They are meta-files + * generated by the CHM creation process. + */ + struct mschmd_file *sysfiles; + + /** The section 0 (uncompressed) data in this CHM helpfile. */ + struct mschmd_sec_uncompressed sec0; + + /** The section 1 (MSCompressed) data in this CHM helpfile. */ + struct mschmd_sec_mscompressed sec1; + + /** The file offset of the first PMGL/PMGI directory chunk. */ + off_t dir_offset; + + /** The number of PMGL/PMGI directory chunks in this CHM helpfile. */ + unsigned int num_chunks; + + /** The size of each PMGL/PMGI chunk, in bytes. */ + unsigned int chunk_size; + + /** The "density" of the quick-reference section in PMGL/PMGI chunks. */ + unsigned int density; + + /** The depth of the index tree. + * + * - if 1, there are no PMGI chunks, only PMGL chunks. + * - if 2, there is 1 PMGI chunk. All chunk indices point to PMGL chunks. + * - if 3, the root PMGI chunk points to secondary PMGI chunks, which in + * turn point to PMGL chunks. + * - and so on... + */ + unsigned int depth; + + /** + * The number of the root PMGI chunk. + * + * If there is no index in the CHM helpfile, this will be 0xFFFFFFFF. + */ + unsigned int index_root; + + /** + * The number of the first PMGL chunk. Usually zero. + * Available only in CHM decoder version 2 and above. + */ + unsigned int first_pmgl; + + /** + * The number of the last PMGL chunk. Usually num_chunks-1. + * Available only in CHM decoder version 2 and above. + */ + unsigned int last_pmgl; + + /** + * A cache of loaded chunks, filled in by mschm_decoder::fast_find(). + * Available only in CHM decoder version 2 and above. + */ + unsigned char **chunk_cache; +}; + +/** + * A structure which represents a file stored in a CHM helpfile. + * + * All fields are READ ONLY. + */ +struct mschmd_file { + /** + * A pointer to the next file in the list, or NULL if this is the final + * file. + */ + struct mschmd_file *next; + + /** + * A pointer to the section that this file is located in. Indirectly, + * it also points to the CHM helpfile the file is located in. + */ + struct mschmd_section *section; + + /** The offset within the section data that this file is located at. */ + off_t offset; + + /** The length of this file, in bytes */ + off_t length; + + /** The filename of this file -- a null terminated string in UTF-8. */ + char *filename; +}; + +/** mschmc_file::section value: end of CHM file list */ +#define MSCHMC_ENDLIST (0) +/** mschmc_file::section value: this file is in the Uncompressed section */ +#define MSCHMC_UNCOMP (1) +/** mschmc_file::section value: this file is in the MSCompressed section */ +#define MSCHMC_MSCOMP (2) + +/** mschm_compressor::set_param() parameter: "timestamp" header */ +#define MSCHMC_PARAM_TIMESTAMP (0) +/** mschm_compressor::set_param() parameter: "language" header */ +#define MSCHMC_PARAM_LANGUAGE (1) +/** mschm_compressor::set_param() parameter: LZX window size */ +#define MSCHMC_PARAM_LZXWINDOW (2) +/** mschm_compressor::set_param() parameter: intra-chunk quickref density */ +#define MSCHMC_PARAM_DENSITY (3) +/** mschm_compressor::set_param() parameter: whether to create indices */ +#define MSCHMC_PARAM_INDEX (4) + +/** + * A compressor for .CHM (Microsoft HTMLHelp) files. + * + * All fields are READ ONLY. + * + * @see mspack_create_chm_compressor(), mspack_destroy_chm_compressor() + */ +struct mschm_compressor { + /** + * Generates a CHM help file. + * + * The help file will contain up to two sections, an Uncompressed + * section and potentially an MSCompressed (LZX compressed) + * section. + * + * While the contents listing of a CHM file is always in lexical order, + * the file list passed in will be taken as the correct order for files + * within the sections. It is in your interest to place similar files + * together for better compression. + * + * There are two modes of generation, to use a temporary file or not to + * use one. See use_temporary_file() for the behaviour of generate() in + * these two different modes. + * + * @param self a self-referential pointer to the mschm_compressor + * instance being called + * @param file_list an array of mschmc_file structures, terminated + * with an entry whose mschmc_file::section field is + * #MSCHMC_ENDLIST. The order of the list is + * preserved within each section. The length of any + * mschmc_file::chm_filename string cannot exceed + * roughly 4096 bytes. Each source file must be able + * to supply as many bytes as given in the + * mschmc_file::length field. + * @param output_file the file to write the generated CHM helpfile to. + * This is passed directly to mspack_system::open() + * @return an error code, or MSPACK_ERR_OK if successful + * @see use_temporary_file() set_param() + */ + int (*generate)(struct mschm_compressor *self, + struct mschmc_file file_list[], + const char *output_file); + + /** + * Specifies whether a temporary file is used during CHM generation. + * + * The CHM file format includes data about the compressed section (such + * as its overall size) that is stored in the output CHM file prior to + * the compressed section itself. This unavoidably requires that the + * compressed section has to be generated, before these details can be + * set. There are several ways this can be handled. Firstly, the + * compressed section could be generated entirely in memory before + * writing any of the output CHM file. This approach is not used in + * libmspack, as the compressed section can exceed the addressable + * memory space on most architectures. + * + * libmspack has two options, either to write these unknowable sections + * with blank data, generate the compressed section, then re-open the + * output file for update once the compressed section has been + * completed, or to write the compressed section to a temporary file, + * then write the entire output file at once, performing a simple + * file-to-file copy for the compressed section. + * + * The simple solution of buffering the entire compressed section in + * memory can still be used, if desired. As the temporary file's + * filename is passed directly to mspack_system::open(), it is possible + * for a custom mspack_system implementation to hold this file in memory, + * without writing to a disk. + * + * If a temporary file is set, generate() performs the following + * sequence of events: the temporary file is opened for writing, the + * compression algorithm writes to the temporary file, the temporary + * file is closed. Then the output file is opened for writing and the + * temporary file is re-opened for reading. The output file is written + * and the temporary file is read from. Both files are then closed. The + * temporary file itself is not deleted. If that is desired, the + * temporary file should be deleted after the completion of generate(), + * if it exists. + * + * If a temporary file is set not to be used, generate() performs the + * following sequence of events: the output file is opened for writing, + * then it is written and closed. The output file is then re-opened for + * update, the appropriate sections are seek()ed to and re-written, then + * the output file is closed. + * + * @param self a self-referential pointer to the + * mschm_compressor instance being called + * @param use_temp_file non-zero if the temporary file should be used, + * zero if the temporary file should not be used. + * @param temp_file a file to temporarily write compressed data to, + * before opening it for reading and copying the + * contents to the output file. This is passed + * directly to mspack_system::open(). + * @return an error code, or MSPACK_ERR_OK if successful + * @see generate() + */ + int (*use_temporary_file)(struct mschm_compressor *self, + int use_temp_file, + const char *temp_file); + /** + * Sets a CHM compression engine parameter. + * + * The following parameters are defined: + + * - #MSCHMC_PARAM_TIMESTAMP: Sets the "timestamp" of the CHM file + * generated. This is not a timestamp, see mschmd_header::timestamp + * for a description. If this timestamp is 0, generate() will use its + * own algorithm for making a unique ID, based on the lengths and + * names of files in the CHM itself. Defaults to 0, any value between + * 0 and (2^32)-1 is valid. + * - #MSCHMC_PARAM_LANGUAGE: Sets the "language" of the CHM file + * generated. This is not the language used in the CHM file, but the + * language setting of the user who ran the HTMLHelp compiler. It + * defaults to 0x0409. The valid range is between 0x0000 and 0x7F7F. + * - #MSCHMC_PARAM_LZXWINDOW: Sets the size of the LZX history window, + * which is also the interval at which the compressed data stream can be + * randomly accessed. The value is not a size in bytes, but a power of + * two. The default value is 16 (which makes the window 2^16 bytes, or + * 64 kilobytes), the valid range is from 15 (32 kilobytes) to 21 (2 + * megabytes). + * - #MSCHMC_PARAM_DENSITY: Sets the "density" of quick reference + * entries stored at the end of directory listing chunk. Each chunk is + * 4096 bytes in size, and contains as many file entries as there is + * room for. At the other end of the chunk, a list of "quick reference" + * pointers is included. The offset of every 'N'th file entry is given a + * quick reference, where N = (2^density) + 1. The default density is + * 2. The smallest density is 0 (N=2), the maximum is 10 (N=1025). As + * each file entry requires at least 5 bytes, the maximum number of + * entries in a single chunk is roughly 800, so the maximum value 10 + * can be used to indicate there are no quickrefs at all. + * - #MSCHMC_PARAM_INDEX: Sets whether or not to include quick lookup + * index chunk(s), in addition to normal directory listing chunks. A + * value of zero means no index chunks will be created, a non-zero value + * means index chunks will be created. The default is zero, "don't + * create an index". + * + * @param self a self-referential pointer to the mschm_compressor + * instance being called + * @param param the parameter to set + * @param value the value to set the parameter to + * @return MSPACK_ERR_OK if all is OK, or MSPACK_ERR_ARGS if there + * is a problem with either parameter or value. + * @see generate() + */ + int (*set_param)(struct mschm_compressor *self, + int param, + unsigned int value); + + /** + * Returns the error code set by the most recently called method. + * + * @param self a self-referential pointer to the mschm_compressor + * instance being called + * @return the most recent error code + * @see set_param(), generate() + */ + int (*last_error)(struct mschm_compressor *self); +}; + +/** + * A decompressor for .CHM (Microsoft HTMLHelp) files + * + * All fields are READ ONLY. + * + * @see mspack_create_chm_decompressor(), mspack_destroy_chm_decompressor() + */ +struct mschm_decompressor { + /** + * Opens a CHM helpfile and reads its contents. + * + * If the file opened is a valid CHM helpfile, all headers will be read + * and a mschmd_header structure will be returned, with a full list of + * files. + * + * In the case of an error occuring, NULL is returned and the error code + * is available from last_error(). + * + * The filename pointer should be considered "in use" until close() is + * called on the CHM helpfile. + * + * @param self a self-referential pointer to the mschm_decompressor + * instance being called + * @param filename the filename of the CHM helpfile. This is passed + * directly to mspack_system::open(). + * @return a pointer to a mschmd_header structure, or NULL on failure + * @see close() + */ + struct mschmd_header *(*open)(struct mschm_decompressor *self, + const char *filename); + + /** + * Closes a previously opened CHM helpfile. + * + * This closes a CHM helpfile, frees the mschmd_header and all + * mschmd_file structures associated with it (if any). This works on + * both helpfiles opened with open() and helpfiles opened with + * fast_open(). + * + * The CHM header pointer is now invalid and cannot be used again. All + * mschmd_file pointers referencing that CHM are also now invalid, and + * cannot be used again. + * + * @param self a self-referential pointer to the mschm_decompressor + * instance being called + * @param chm the CHM helpfile to close + * @see open(), fast_open() + */ + void (*close)(struct mschm_decompressor *self, + struct mschmd_header *chm); + + /** + * Extracts a file from a CHM helpfile. + * + * This extracts a file from a CHM helpfile and writes it to the given + * filename. The filename of the file, mscabd_file::filename, is not + * used by extract(), but can be used by the caller as a guide for + * constructing an appropriate filename. + * + * This method works both with files found in the mschmd_header::files + * and mschmd_header::sysfiles list and mschmd_file structures generated + * on the fly by fast_find(). + * + * @param self a self-referential pointer to the mschm_decompressor + * instance being called + * @param file the file to be decompressed + * @param filename the filename of the file being written to + * @return an error code, or MSPACK_ERR_OK if successful + */ + int (*extract)(struct mschm_decompressor *self, + struct mschmd_file *file, + const char *filename); + + /** + * Returns the error code set by the most recently called method. + * + * This is useful for open() and fast_open(), which do not return an + * error code directly. + * + * @param self a self-referential pointer to the mschm_decompressor + * instance being called + * @return the most recent error code + * @see open(), extract() + */ + int (*last_error)(struct mschm_decompressor *self); + + /** + * Opens a CHM helpfile quickly. + * + * If the file opened is a valid CHM helpfile, only essential headers + * will be read. A mschmd_header structure will be still be returned, as + * with open(), but the mschmd_header::files field will be NULL. No + * files details will be automatically read. The fast_find() method + * must be used to obtain file details. + * + * In the case of an error occuring, NULL is returned and the error code + * is available from last_error(). + * + * The filename pointer should be considered "in use" until close() is + * called on the CHM helpfile. + * + * @param self a self-referential pointer to the mschm_decompressor + * instance being called + * @param filename the filename of the CHM helpfile. This is passed + * directly to mspack_system::open(). + * @return a pointer to a mschmd_header structure, or NULL on failure + * @see open(), close(), fast_find(), extract() + */ + struct mschmd_header *(*fast_open)(struct mschm_decompressor *self, + const char *filename); + + /** + * Finds file details quickly. + * + * Instead of reading all CHM helpfile headers and building a list of + * files, fast_open() and fast_find() are intended for finding file + * details only when they are needed. The CHM file format includes an + * on-disk file index to allow this. + * + * Given a case-sensitive filename, fast_find() will search the on-disk + * index for that file. + * + * If the file was found, the caller-provided mschmd_file structure will + * be filled out like so: + * - section: the correct value for the found file + * - offset: the correct value for the found file + * - length: the correct value for the found file + * - all other structure elements: NULL or 0 + * + * If the file was not found, MSPACK_ERR_OK will still be returned as the + * result, but the caller-provided structure will be filled out like so: + * - section: NULL + * - offset: 0 + * - length: 0 + * - all other structure elements: NULL or 0 + * + * This method is intended to be used in conjunction with CHM helpfiles + * opened with fast_open(), but it also works with helpfiles opened + * using the regular open(). + * + * @param self a self-referential pointer to the mschm_decompressor + * instance being called + * @param chm the CHM helpfile to search for the file + * @param filename the filename of the file to search for + * @param f_ptr a pointer to a caller-provded mschmd_file structure + * @param f_size sizeof(struct mschmd_file) + * @return an error code, or MSPACK_ERR_OK if successful + * @see open(), close(), fast_find(), extract() + */ + int (*fast_find)(struct mschm_decompressor *self, + struct mschmd_header *chm, + const char *filename, + struct mschmd_file *f_ptr, + int f_size); +}; + +/* --- support for .LIT (EBook) file format -------------------------------- */ + +/** TODO */ +struct mslit_compressor { + int dummy; +}; + +/** TODO */ +struct mslit_decompressor { + int dummy; +}; + + +/* --- support for .HLP (MS Help) file format ------------------------------ */ + +/** TODO */ +struct mshlp_compressor { + int dummy; +}; + +/** TODO */ +struct mshlp_decompressor { + int dummy; +}; + + +/* --- support for SZDD file format ---------------------------------------- */ + +/** msszdd_compressor::set_param() parameter: the missing character */ +#define MSSZDDC_PARAM_MISSINGCHAR (0) + +/** msszddd_header::format value - a regular SZDD file */ +#define MSSZDD_FMT_NORMAL (0) + +/** msszddd_header::format value - a special QBasic SZDD file */ +#define MSSZDD_FMT_QBASIC (1) + +/** + * A structure which represents an SZDD compressed file. + * + * All fields are READ ONLY. + */ +struct msszddd_header { + /** The file format; either #MSSZDD_FMT_NORMAL or #MSSZDD_FMT_QBASIC */ + int format; + + /** The amount of data in the SZDD file once uncompressed. */ + off_t length; + + /** + * The last character in the filename, traditionally replaced with an + * underscore to show the file is compressed. The null character is used + * to show that this character has not been stored (e.g. because the + * filename is not known). Generally, only characters that may appear in + * an MS-DOS filename (except ".") are valid. + */ + char missing_char; +}; + +/** + * A compressor for the SZDD file format. + * + * All fields are READ ONLY. + * + * @see mspack_create_szdd_compressor(), mspack_destroy_szdd_compressor() + */ +struct msszdd_compressor { + /** + * Reads an input file and creates a compressed output file in the + * SZDD compressed file format. The SZDD compression format is quick + * but gives poor compression. It is possible for the compressed output + * file to be larger than the input file. + * + * Conventionally, SZDD compressed files have the final character in + * their filename replaced with an underscore, to show they are + * compressed. The missing character is stored in the compressed file + * itself. This is due to the restricted filename conventions of MS-DOS, + * most operating systems, such as UNIX, simply append another file + * extension to the existing filename. As mspack does not deal with + * filenames, this is left up to you. If you wish to set the missing + * character stored in the file header, use set_param() with the + * #MSSZDDC_PARAM_MISSINGCHAR parameter. + * + * "Stream" compression (where the length of the input data is not + * known) is not possible. The length of the input data is stored in the + * header of the SZDD file and must therefore be known before any data + * is compressed. Due to technical limitations of the file format, the + * maximum size of uncompressed file that will be accepted is 2147483647 + * bytes. + * + * @param self a self-referential pointer to the msszdd_compressor + * instance being called + * @param input the name of the file to compressed. This is passed + * passed directly to mspack_system::open() + * @param output the name of the file to write compressed data to. + * This is passed directly to mspack_system::open(). + * @param length the length of the uncompressed file, or -1 to indicate + * that this should be determined automatically by using + * mspack_system::seek() on the input file. + * @return an error code, or MSPACK_ERR_OK if successful + * @see set_param() + */ + int (*compress)(struct msszdd_compressor *self, + const char *input, + const char *output, + off_t length); + + /** + * Sets an SZDD compression engine parameter. + * + * The following parameters are defined: + + * - #MSSZDDC_PARAM_CHARACTER: the "missing character", the last character + * in the uncompressed file's filename, which is traditionally replaced + * with an underscore to show the file is compressed. Traditionally, + * this can only be a character that is a valid part of an MS-DOS, + * filename, but libmspack permits any character between 0x00 and 0xFF + * to be stored. 0x00 is the default, and it represents "no character + * stored". + * + * @param self a self-referential pointer to the msszdd_compressor + * instance being called + * @param param the parameter to set + * @param value the value to set the parameter to + * @return MSPACK_ERR_OK if all is OK, or MSPACK_ERR_ARGS if there + * is a problem with either parameter or value. + * @see compress() + */ + int (*set_param)(struct msszdd_compressor *self, + int param, + unsigned int value); + + /** + * Returns the error code set by the most recently called method. + * + * @param self a self-referential pointer to the msszdd_compressor + * instance being called + * @return the most recent error code + * @see compress() + */ + int (*last_error)(struct mschm_decompressor *self); +}; + +/** + * A decompressor for SZDD compressed files. + * + * All fields are READ ONLY. + * + * @see mspack_create_szdd_decompressor(), mspack_destroy_szdd_decompressor() + */ +struct msszdd_decompressor { + /** + * Opens a SZDD file and reads the header. + * + * If the file opened is a valid SZDD file, all headers will be read and + * a msszddd_header structure will be returned. + * + * In the case of an error occuring, NULL is returned and the error code + * is available from last_error(). + * + * The filename pointer should be considered "in use" until close() is + * called on the SZDD file. + * + * @param self a self-referential pointer to the msszdd_decompressor + * instance being called + * @param filename the filename of the SZDD compressed file. This is + * passed directly to mspack_system::open(). + * @return a pointer to a msszddd_header structure, or NULL on failure + * @see close() + */ + struct msszddd_header *(*open)(struct msszdd_decompressor *self, + const char *filename); + + /** + * Closes a previously opened SZDD file. + * + * This closes a SZDD file and frees the msszddd_header associated with + * it. + * + * The SZDD header pointer is now invalid and cannot be used again. + * + * @param self a self-referential pointer to the msszdd_decompressor + * instance being called + * @param szdd the SZDD file to close + * @see open() + */ + void (*close)(struct msszdd_decompressor *self, + struct msszddd_header *szdd); + + /** + * Extracts the compressed data from a SZDD file. + * + * This decompresses the compressed SZDD data stream and writes it to + * an output file. + * + * @param self a self-referential pointer to the msszdd_decompressor + * instance being called + * @param szdd the SZDD file to extract data from + * @param filename the filename to write the decompressed data to. This + * is passed directly to mspack_system::open(). + * @return an error code, or MSPACK_ERR_OK if successful + */ + int (*extract)(struct msszdd_decompressor *self, + struct msszddd_header *szdd, + const char *filename); + + /** + * Decompresses an SZDD file to an output file in one step. + * + * This opens an SZDD file as input, reads the header, then decompresses + * the compressed data immediately to an output file, finally closing + * both the input and output file. It is more convenient to use than + * open() then extract() then close(), if you do not need to know the + * SZDD output size or missing character. + * + * @param self a self-referential pointer to the msszdd_decompressor + * instance being called + * @param input the filename of the input SZDD file. This is passed + * directly to mspack_system::open(). + * @param output the filename to write the decompressed data to. This + * is passed directly to mspack_system::open(). + * @return an error code, or MSPACK_ERR_OK if successful + */ + int (*decompress)(struct msszdd_decompressor *self, + const char *input, + const char *output); + + /** + * Returns the error code set by the most recently called method. + * + * This is useful for open() which does not return an + * error code directly. + * + * @param self a self-referential pointer to the msszdd_decompressor + * instance being called + * @return the most recent error code + * @see open(), extract(), decompress() + */ + int (*last_error)(struct msszdd_decompressor *self); +}; + +/* --- support for KWAJ file format ---------------------------------------- */ + +/** mskwaj_compressor::set_param() parameter: compression type */ +#define MSKWAJC_PARAM_COMP_TYPE (0) + +/** mskwaj_compressor::set_param() parameter: include the length of the + * uncompressed file in the header? + */ +#define MSKWAJC_PARAM_INCLUDE_LENGTH (1) + +/** KWAJ compression type: no compression. */ +#define MSKWAJ_COMP_NONE (0) +/** KWAJ compression type: no compression, 0xFF XOR "encryption". */ +#define MSKWAJ_COMP_XOR (1) +/** KWAJ compression type: LZSS (same method as SZDD) */ +#define MSKWAJ_COMP_SZDD (2) +/** KWAJ compression type: LZ+Huffman compression */ +#define MSKWAJ_COMP_LZH (3) +/** KWAJ compression type: MSZIP */ +#define MSKWAJ_COMP_MSZIP (4) + +/** KWAJ optional header flag: decompressed file length is included */ +#define MSKWAJ_HDR_HASLENGTH (0x01) + +/** KWAJ optional header flag: unknown 2-byte structure is included */ +#define MSKWAJ_HDR_HASUNKNOWN1 (0x02) + +/** KWAJ optional header flag: unknown multi-sized structure is included */ +#define MSKWAJ_HDR_HASUNKNOWN2 (0x04) + +/** KWAJ optional header flag: file name (no extension) is included */ +#define MSKWAJ_HDR_HASFILENAME (0x08) + +/** KWAJ optional header flag: file extension is included */ +#define MSKWAJ_HDR_HASFILEEXT (0x10) + +/** KWAJ optional header flag: extra text is included */ +#define MSKWAJ_HDR_HASEXTRATEXT (0x20) + +/** + * A structure which represents an KWAJ compressed file. + * + * All fields are READ ONLY. + */ +struct mskwajd_header { + /** The compression type; should be one of #MSKWAJ_COMP_NONE, + * #MSKWAJ_COMP_XOR, #MSKWAJ_COMP_SZDD or #MSKWAJ_COMP_LZH + */ + unsigned short comp_type; + + /** The offset in the file where the compressed data stream begins */ + off_t data_offset; + + /** Flags indicating which optional headers were included. */ + int headers; + + /** The amount of uncompressed data in the file, or 0 if not present. */ + off_t length; + + /** output filename, or NULL if not present */ + char *filename; + + /** extra uncompressed data (usually text) in the header. + * This data can contain nulls so use extra_length to get the size. + */ + char *extra; + + /** length of extra uncompressed data in the header */ + unsigned short extra_length; +}; + +/** + * A compressor for the KWAJ file format. + * + * All fields are READ ONLY. + * + * @see mspack_create_kwaj_compressor(), mspack_destroy_kwaj_compressor() + */ +struct mskwaj_compressor { + /** + * Reads an input file and creates a compressed output file in the + * KWAJ compressed file format. The KWAJ compression format is quick + * but gives poor compression. It is possible for the compressed output + * file to be larger than the input file. + * + * @param self a self-referential pointer to the mskwaj_compressor + * instance being called + * @param input the name of the file to compressed. This is passed + * passed directly to mspack_system::open() + * @param output the name of the file to write compressed data to. + * This is passed directly to mspack_system::open(). + * @param length the length of the uncompressed file, or -1 to indicate + * that this should be determined automatically by using + * mspack_system::seek() on the input file. + * @return an error code, or MSPACK_ERR_OK if successful + * @see set_param() + */ + int (*compress)(struct mskwaj_compressor *self, + const char *input, + const char *output, + off_t length); + + /** + * Sets an KWAJ compression engine parameter. + * + * The following parameters are defined: + * + * - #MSKWAJC_PARAM_COMP_TYPE: the compression method to use. Must + * be one of #MSKWAJC_COMP_NONE, #MSKWAJC_COMP_XOR, #MSKWAJ_COMP_SZDD + * or #MSKWAJ_COMP_LZH. The default is #MSKWAJ_COMP_LZH. + * + * - #MSKWAJC_PARAM_INCLUDE_LENGTH: a boolean; should the compressed + * output file should include the uncompressed length of the input + * file in the header? This adds 4 bytes to the size of the output + * file. A value of zero says "no", non-zero says "yes". The default + * is "no". + * + * @param self a self-referential pointer to the mskwaj_compressor + * instance being called + * @param param the parameter to set + * @param value the value to set the parameter to + * @return MSPACK_ERR_OK if all is OK, or MSPACK_ERR_ARGS if there + * is a problem with either parameter or value. + * @see generate() + */ + int (*set_param)(struct mskwaj_compressor *self, + int param, + unsigned int value); + + + /** + * Sets the original filename of the file before compression, + * which will be stored in the header of the output file. + * + * The filename should be a null-terminated string, it must be an + * MS-DOS "8.3" type filename (up to 8 bytes for the filename, then + * optionally a "." and up to 3 bytes for a filename extension). + * + * If NULL is passed as the filename, no filename is included in the + * header. This is the default. + * + * @param self a self-referential pointer to the mskwaj_compressor + * instance being called + * @param filename the original filename to use + * @return MSPACK_ERR_OK if all is OK, or MSPACK_ERR_ARGS if the + * filename is too long + */ + int (*set_filename)(struct mskwaj_compressor *self, + const char *filename); + + /** + * Sets arbitrary data that will be stored in the header of the + * output file, uncompressed. It can be up to roughly 64 kilobytes, + * as the overall size of the header must not exceed 65535 bytes. + * The data can contain null bytes if desired. + * + * If NULL is passed as the data pointer, or zero is passed as the + * length, no extra data is included in the header. This is the + * default. + * + * @param self a self-referential pointer to the mskwaj_compressor + * instance being called + * @param data a pointer to the data to be stored in the header + * @param bytes the length of the data in bytes + * @return MSPACK_ERR_OK if all is OK, or MSPACK_ERR_ARGS extra data + * is too long + */ + int (*set_extra_data)(struct mskwaj_compressor *self, + void *data, + size_t bytes); + + /** + * Returns the error code set by the most recently called method. + * + * @param self a self-referential pointer to the mskwaj_compressor + * instance being called + * @return the most recent error code + * @see compress() + */ + int (*last_error)(struct mschm_decompressor *self); +}; + +/** + * A decompressor for KWAJ compressed files. + * + * All fields are READ ONLY. + * + * @see mspack_create_kwaj_decompressor(), mspack_destroy_kwaj_decompressor() + */ +struct mskwaj_decompressor { + /** + * Opens a KWAJ file and reads the header. + * + * If the file opened is a valid KWAJ file, all headers will be read and + * a mskwajd_header structure will be returned. + * + * In the case of an error occuring, NULL is returned and the error code + * is available from last_error(). + * + * The filename pointer should be considered "in use" until close() is + * called on the KWAJ file. + * + * @param self a self-referential pointer to the mskwaj_decompressor + * instance being called + * @param filename the filename of the KWAJ compressed file. This is + * passed directly to mspack_system::open(). + * @return a pointer to a mskwajd_header structure, or NULL on failure + * @see close() + */ + struct mskwajd_header *(*open)(struct mskwaj_decompressor *self, + const char *filename); + + /** + * Closes a previously opened KWAJ file. + * + * This closes a KWAJ file and frees the mskwajd_header associated + * with it. The KWAJ header pointer is now invalid and cannot be + * used again. + * + * @param self a self-referential pointer to the mskwaj_decompressor + * instance being called + * @param kwaj the KWAJ file to close + * @see open() + */ + void (*close)(struct mskwaj_decompressor *self, + struct mskwajd_header *kwaj); + + /** + * Extracts the compressed data from a KWAJ file. + * + * This decompresses the compressed KWAJ data stream and writes it to + * an output file. + * + * @param self a self-referential pointer to the mskwaj_decompressor + * instance being called + * @param kwaj the KWAJ file to extract data from + * @param filename the filename to write the decompressed data to. This + * is passed directly to mspack_system::open(). + * @return an error code, or MSPACK_ERR_OK if successful + */ + int (*extract)(struct mskwaj_decompressor *self, + struct mskwajd_header *kwaj, + const char *filename); + + /** + * Decompresses an KWAJ file to an output file in one step. + * + * This opens an KWAJ file as input, reads the header, then decompresses + * the compressed data immediately to an output file, finally closing + * both the input and output file. It is more convenient to use than + * open() then extract() then close(), if you do not need to know the + * KWAJ output size or output filename. + * + * @param self a self-referential pointer to the mskwaj_decompressor + * instance being called + * @param input the filename of the input KWAJ file. This is passed + * directly to mspack_system::open(). + * @param output the filename to write the decompressed data to. This + * is passed directly to mspack_system::open(). + * @return an error code, or MSPACK_ERR_OK if successful + */ + int (*decompress)(struct mskwaj_decompressor *self, + const char *input, + const char *output); + + /** + * Returns the error code set by the most recently called method. + * + * This is useful for open() which does not return an + * error code directly. + * + * @param self a self-referential pointer to the mskwaj_decompressor + * instance being called + * @return the most recent error code + * @see open(), search() + */ + int (*last_error)(struct mskwaj_decompressor *self); +}; + +/* --- support for .LZX (Offline Address Book) file format ----------------- */ + +/** + * A compressor for the Offline Address Book (OAB) format. + * + * All fields are READ ONLY. + * + * @see mspack_create_oab_compressor(), mspack_destroy_oab_compressor() + */ +struct msoab_compressor { + /** + * Compress a full OAB file. + * + * The input file will be read and the compressed contents written to the + * output file. + * + * @param self a self-referential pointer to the msoab_decompressor + * instance being called + * @param input the filename of the input file. This is passed + * directly to mspack_system::open(). + * @param output the filename of the output file. This is passed + * directly to mspack_system::open(). + * @return an error code, or MSPACK_ERR_OK if successful + */ + int (*compress) (struct msoab_compressor *self, + const char *input, + const char *output); + + /** + * Generate a compressed incremental OAB patch file. + * + * The two uncompressed files "input" and "base" will be read, and an + * incremental patch to generate "input" from "base" will be written to + * the output file. + * + * @param self a self-referential pointer to the msoab_compressor + * instance being called + * @param input the filename of the input file containing the new + * version of its contents. This is passed directly + * to mspack_system::open(). + * @param base the filename of the original base file containing + * the old version of its contents, against which the + * incremental patch shall generated. This is passed + * directly to mspack_system::open(). + * @param output the filename of the output file. This is passed + * directly to mspack_system::open(). + * @return an error code, or MSPACK_ERR_OK if successful + */ + int (*compress_incremental) (struct msoab_compressor *self, + const char *input, + const char *base, + const char *output); +}; + +/** + * A decompressor for .LZX (Offline Address Book) files + * + * All fields are READ ONLY. + * + * @see mspack_create_oab_decompressor(), mspack_destroy_oab_decompressor() + */ +struct msoab_decompressor { + /** + * Decompresses a full Offline Address Book file. + * + * If the input file is a valid compressed Offline Address Book file, + * it will be read and the decompressed contents will be written to + * the output file. + * + * @param self a self-referential pointer to the msoab_decompressor + * instance being called + * @param input the filename of the input file. This is passed + * directly to mspack_system::open(). + * @param output the filename of the output file. This is passed + * directly to mspack_system::open(). + * @return an error code, or MSPACK_ERR_OK if successful + */ + int (*decompress) (struct msoab_decompressor *self, + const char *input, + const char *output); + + /** + * Decompresses an Offline Address Book with an incremental patch file. + * + * This requires both a full UNCOMPRESSED Offline Address Book file to + * act as the "base", and a compressed incremental patch file as input. + * If the input file is valid, it will be decompressed with reference to + * the base file, and the decompressed contents will be written to the + * output file. + * + * There is no way to tell what the right base file is for the given + * incremental patch, but if you get it wrong, this will usually result + * in incorrect data being decompressed, which will then fail a checksum + * test. + * + * @param self a self-referential pointer to the msoab_decompressor + * instance being called + * @param input the filename of the input file. This is passed + * directly to mspack_system::open(). + * @param base the filename of the base file to which the + * incremental patch shall be applied. This is passed + * directly to mspack_system::open(). + * @param output the filename of the output file. This is passed + * directly to mspack_system::open(). + * @return an error code, or MSPACK_ERR_OK if successful + */ + int (*decompress_incremental) (struct msoab_decompressor *self, + const char *input, + const char *base, + const char *output); +}; + +#ifdef __cplusplus +} #endif #endif diff --git a/third_party/mspack/readbits.h b/third_party/mspack/readbits.h new file mode 100644 index 000000000..9b237a369 --- /dev/null +++ b/third_party/mspack/readbits.h @@ -0,0 +1,207 @@ +/* This file is part of libmspack. + * (C) 2003-2010 Stuart Caie. + * + * libmspack is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License (LGPL) version 2.1 + * + * For further details, see the file COPYING.LIB distributed with libmspack + */ + +#ifndef MSPACK_READBITS_H +#define MSPACK_READBITS_H 1 + +/* this header defines macros that read data streams by + * the individual bits + * + * INIT_BITS initialises bitstream state in state structure + * STORE_BITS stores bitstream state in state structure + * RESTORE_BITS restores bitstream state from state structure + * ENSURE_BITS(n) ensure there are at least N bits in the bit buffer + * READ_BITS(var,n) takes N bits from the buffer and puts them in var + * PEEK_BITS(n) extracts without removing N bits from the bit buffer + * REMOVE_BITS(n) removes N bits from the bit buffer + * + * READ_BITS simply calls ENSURE_BITS, PEEK_BITS and REMOVE_BITS, + * which means it's limited to reading the number of bits you can + * ensure at any one time. It also fails if asked to read zero bits. + * If you need to read zero bits, or more bits than can be ensured in + * one go, use READ_MANY_BITS instead. + * + * These macros have variable names baked into them, so to use them + * you have to define some macros: + * - BITS_TYPE: the type name of your state structure + * - BITS_VAR: the variable that points to your state structure + * - define BITS_ORDER_MSB if bits are read from the MSB, or + * define BITS_ORDER_LSB if bits are read from the LSB + * - READ_BYTES: some code that reads more data into the bit buffer, + * it should use READ_IF_NEEDED (calls read_input if the byte buffer + * is empty), then INJECT_BITS(data,n) to put data from the byte + * buffer into the bit buffer. + * + * You also need to define some variables and structure members: + * - unsigned char *i_ptr; // current position in the byte buffer + * - unsigned char *i_end; // end of the byte buffer + * - unsigned int bit_buffer; // the bit buffer itself + * - unsigned int bits_left; // number of bits remaining + * + * If you use read_input() and READ_IF_NEEDED, they also expect these + * structure members: + * - struct mspack_system *sys; // to access sys->read() + * - unsigned int error; // to record/return read errors + * - unsigned char input_end; // to mark reaching the EOF + * - unsigned char *inbuf; // the input byte buffer + * - unsigned int inbuf_size; // the size of the input byte buffer + * + * Your READ_BYTES implementation should read data from *i_ptr and + * put them in the bit buffer. READ_IF_NEEDED will call read_input() + * if i_ptr reaches i_end, and will fill up inbuf and set i_ptr to + * the start of inbuf and i_end to the end of inbuf. + * + * If you're reading in MSB order, the routines work by using the area + * beyond the MSB and the LSB of the bit buffer as a free source of + * zeroes when shifting. This avoids having to mask any bits. So we + * have to know the bit width of the bit buffer variable. We use + * and CHAR_BIT to find the size of the bit buffer in bits. + * + * If you are reading in LSB order, bits need to be masked. Normally + * this is done by computing the mask: N bits are masked by the value + * (1< +#endif +#ifndef CHAR_BIT +# define CHAR_BIT (8) +#endif +#define BITBUF_WIDTH (sizeof(bit_buffer) * CHAR_BIT) + +#define INIT_BITS do { \ + BITS_VAR->i_ptr = &BITS_VAR->inbuf[0]; \ + BITS_VAR->i_end = &BITS_VAR->inbuf[0]; \ + BITS_VAR->bit_buffer = 0; \ + BITS_VAR->bits_left = 0; \ + BITS_VAR->input_end = 0; \ +} while (0) + +#define STORE_BITS do { \ + BITS_VAR->i_ptr = i_ptr; \ + BITS_VAR->i_end = i_end; \ + BITS_VAR->bit_buffer = bit_buffer; \ + BITS_VAR->bits_left = bits_left; \ +} while (0) + +#define RESTORE_BITS do { \ + i_ptr = BITS_VAR->i_ptr; \ + i_end = BITS_VAR->i_end; \ + bit_buffer = BITS_VAR->bit_buffer; \ + bits_left = BITS_VAR->bits_left; \ +} while (0) + +#define ENSURE_BITS(nbits) do { \ + while (bits_left < (nbits)) READ_BYTES; \ +} while (0) + +#define READ_BITS(val, nbits) do { \ + ENSURE_BITS(nbits); \ + (val) = PEEK_BITS(nbits); \ + REMOVE_BITS(nbits); \ +} while (0) + +#define READ_MANY_BITS(val, bits) do { \ + unsigned char needed = (bits), bitrun; \ + (val) = 0; \ + while (needed > 0) { \ + if (bits_left <= (BITBUF_WIDTH - 16)) READ_BYTES; \ + bitrun = (bits_left < needed) ? bits_left : needed; \ + (val) = ((val) << bitrun) | PEEK_BITS(bitrun); \ + REMOVE_BITS(bitrun); \ + needed -= bitrun; \ + } \ +} while (0) + +#ifdef BITS_ORDER_MSB +# define PEEK_BITS(nbits) (bit_buffer >> (BITBUF_WIDTH - (nbits))) +# define REMOVE_BITS(nbits) ((bit_buffer <<= (nbits)), (bits_left -= (nbits))) +# define INJECT_BITS(bitdata,nbits) ((bit_buffer |= \ + (bitdata) << (BITBUF_WIDTH - (nbits) - bits_left)), (bits_left += (nbits))) +#else /* BITS_ORDER_LSB */ +# define PEEK_BITS(nbits) (bit_buffer & ((1 << (nbits))-1)) +# define REMOVE_BITS(nbits) ((bit_buffer >>= (nbits)), (bits_left -= (nbits))) +# define INJECT_BITS(bitdata,nbits) ((bit_buffer |= \ + (bitdata) << bits_left), (bits_left += (nbits))) +#endif + +#ifdef BITS_LSB_TABLE +/* lsb_bit_mask[n] = (1 << n) - 1 */ +static const unsigned short lsb_bit_mask[17] = { + 0x0000, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, + 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff +}; +# define PEEK_BITS_T(nbits) (bit_buffer & lsb_bit_mask[(nbits)]) +# define READ_BITS_T(val, nbits) do { \ + ENSURE_BITS(nbits); \ + (val) = PEEK_BITS_T(nbits); \ + REMOVE_BITS(nbits); \ +} while (0) +#endif + +#ifndef BITS_NO_READ_INPUT +# define READ_IF_NEEDED do { \ + if (i_ptr >= i_end) { \ + if (read_input(BITS_VAR)) \ + return BITS_VAR->error; \ + i_ptr = BITS_VAR->i_ptr; \ + i_end = BITS_VAR->i_end; \ + } \ +} while (0) + +static int read_input(BITS_TYPE *p) { + int read = p->sys->read(p->input, &p->inbuf[0], (int)p->inbuf_size); + if (read < 0) return p->error = MSPACK_ERR_READ; + + /* we might overrun the input stream by asking for bits we don't use, + * so fake 2 more bytes at the end of input */ + if (read == 0) { + if (p->input_end) { + D(("out of input bytes")) + return p->error = MSPACK_ERR_READ; + } + else { + read = 2; + p->inbuf[0] = p->inbuf[1] = 0; + p->input_end = 1; + } + } + + /* update i_ptr and i_end */ + p->i_ptr = &p->inbuf[0]; + p->i_end = &p->inbuf[read]; + return MSPACK_ERR_OK; +} +#endif +#endif diff --git a/third_party/mspack/readhuff.h b/third_party/mspack/readhuff.h new file mode 100644 index 000000000..4d9422578 --- /dev/null +++ b/third_party/mspack/readhuff.h @@ -0,0 +1,172 @@ +/* This file is part of libmspack. + * (C) 2003-2014 Stuart Caie. + * + * libmspack is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License (LGPL) version 2.1 + * + * For further details, see the file COPYING.LIB distributed with libmspack + */ + +#ifndef MSPACK_READHUFF_H +#define MSPACK_READHUFF_H 1 + +/* This implements a fast Huffman tree decoding system. */ + +#if !(defined(BITS_ORDER_MSB) || defined(BITS_ORDER_LSB)) +# error "readhuff.h is used in conjunction with readbits.h, include that first" +#endif +#if !(defined(TABLEBITS) && defined(MAXSYMBOLS)) +# error "define TABLEBITS(tbl) and MAXSYMBOLS(tbl) before using readhuff.h" +#endif +#if !(defined(HUFF_TABLE) && defined(HUFF_LEN)) +# error "define HUFF_TABLE(tbl) and HUFF_LEN(tbl) before using readhuff.h" +#endif +#ifndef HUFF_ERROR +# error "define HUFF_ERROR before using readhuff.h" +#endif +#ifndef HUFF_MAXBITS +# define HUFF_MAXBITS 16 +#endif + +/* Decodes the next huffman symbol from the input bitstream into var. + * Do not use this macro on a table unless build_decode_table() succeeded. + */ +#define READ_HUFFSYM(tbl, var) do { \ + ENSURE_BITS(HUFF_MAXBITS); \ + sym = HUFF_TABLE(tbl, PEEK_BITS(TABLEBITS(tbl))); \ + if (sym >= MAXSYMBOLS(tbl)) HUFF_TRAVERSE(tbl); \ + (var) = sym; \ + i = HUFF_LEN(tbl, sym); \ + REMOVE_BITS(i); \ +} while (0) + +#ifdef BITS_ORDER_LSB +# define HUFF_TRAVERSE(tbl) do { \ + i = TABLEBITS(tbl) - 1; \ + do { \ + if (i++ > HUFF_MAXBITS) HUFF_ERROR; \ + sym = HUFF_TABLE(tbl, \ + (sym << 1) | ((bit_buffer >> i) & 1)); \ + } while (sym >= MAXSYMBOLS(tbl)); \ +} while (0) +#else +#define HUFF_TRAVERSE(tbl) do { \ + i = 1 << (BITBUF_WIDTH - TABLEBITS(tbl)); \ + do { \ + if ((i >>= 1) == 0) HUFF_ERROR; \ + sym = HUFF_TABLE(tbl, \ + (sym << 1) | ((bit_buffer & i) ? 1 : 0)); \ + } while (sym >= MAXSYMBOLS(tbl)); \ +} while (0) +#endif + +/* make_decode_table(nsyms, nbits, length[], table[]) + * + * This function was originally coded by David Tritscher. + * It builds a fast huffman decoding table from + * a canonical huffman code lengths table. + * + * nsyms = total number of symbols in this huffman tree. + * nbits = any symbols with a code length of nbits or less can be decoded + * in one lookup of the table. + * length = A table to get code lengths from [0 to nsyms-1] + * table = The table to fill up with decoded symbols and pointers. + * Should be ((1<> 1; /* don't do 0 length codes */ + + /* fill entries for codes short enough for a direct mapping */ + for (bit_num = 1; bit_num <= nbits; bit_num++) { + for (sym = 0; sym < nsyms; sym++) { + if (length[sym] != bit_num) continue; +#ifdef BITS_ORDER_MSB + leaf = pos; +#else + /* reverse the significant bits */ + fill = length[sym]; reverse = pos >> (nbits - fill); leaf = 0; + do {leaf <<= 1; leaf |= reverse & 1; reverse >>= 1;} while (--fill); +#endif + + if((pos += bit_mask) > table_mask) return 1; /* table overrun */ + + /* fill all possible lookups of this symbol with the symbol itself */ +#ifdef BITS_ORDER_MSB + for (fill = bit_mask; fill-- > 0;) table[leaf++] = sym; +#else + fill = bit_mask; next_symbol = 1 << bit_num; + do { table[leaf] = sym; leaf += next_symbol; } while (--fill); +#endif + } + bit_mask >>= 1; + } + + /* exit with success if table is now complete */ + if (pos == table_mask) return 0; + + /* mark all remaining table entries as unused */ + for (sym = pos; sym < table_mask; sym++) { +#ifdef BITS_ORDER_MSB + table[sym] = 0xFFFF; +#else + reverse = sym; leaf = 0; fill = nbits; + do { leaf <<= 1; leaf |= reverse & 1; reverse >>= 1; } while (--fill); + table[leaf] = 0xFFFF; +#endif + } + + /* next_symbol = base of allocation for long codes */ + next_symbol = ((table_mask >> 1) < nsyms) ? nsyms : (table_mask >> 1); + + /* give ourselves room for codes to grow by up to 16 more bits. + * codes now start at bit nbits+16 and end at (nbits+16-codelength) */ + pos <<= 16; + table_mask <<= 16; + bit_mask = 1 << 15; + + for (bit_num = nbits+1; bit_num <= HUFF_MAXBITS; bit_num++) { + for (sym = 0; sym < nsyms; sym++) { + if (length[sym] != bit_num) continue; + if (pos >= table_mask) return 1; /* table overflow */ + +#ifdef BITS_ORDER_MSB + leaf = pos >> 16; +#else + /* leaf = the first nbits of the code, reversed */ + reverse = pos >> 16; leaf = 0; fill = nbits; + do {leaf <<= 1; leaf |= reverse & 1; reverse >>= 1;} while (--fill); +#endif + for (fill = 0; fill < (bit_num - nbits); fill++) { + /* if this path hasn't been taken yet, 'allocate' two entries */ + if (table[leaf] == 0xFFFF) { + table[(next_symbol << 1) ] = 0xFFFF; + table[(next_symbol << 1) + 1 ] = 0xFFFF; + table[leaf] = next_symbol++; + } + + /* follow the path and select either left or right for next bit */ + leaf = table[leaf] << 1; + if ((pos >> (15-fill)) & 1) leaf++; + } + table[leaf] = sym; + pos += bit_mask; + } + bit_mask >>= 1; + } + + /* full table? */ + return (pos == table_mask) ? 0 : 1; +} +#endif diff --git a/third_party/mspack/system.c b/third_party/mspack/system.c new file mode 100644 index 000000000..16aa8806d --- /dev/null +++ b/third_party/mspack/system.c @@ -0,0 +1,242 @@ +/* This file is part of libmspack. + * (C) 2003-2004 Stuart Caie. + * + * libmspack is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License (LGPL) version 2.1 + * + * For further details, see the file COPYING.LIB distributed with libmspack + */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include + +#if !LARGEFILE_SUPPORT +const char *largefile_msg = "library not compiled to support large files."; +#endif + + +int mspack_version(int entity) { + switch (entity) { + /* CHM decoder version 1 -> 2 changes: + * - added mschmd_sec_mscompressed::spaninfo + * - added mschmd_header::first_pmgl + * - added mschmd_header::last_pmgl + * - added mschmd_header::chunk_cache; + */ + case MSPACK_VER_MSCHMD: + /* CAB decoder version 1 -> 2 changes: + * - added MSCABD_PARAM_SALVAGE + */ + case MSPACK_VER_MSCABD: + return 2; + case MSPACK_VER_LIBRARY: + case MSPACK_VER_SYSTEM: + case MSPACK_VER_MSSZDDD: + case MSPACK_VER_MSKWAJD: + case MSPACK_VER_MSOABD: + return 1; + case MSPACK_VER_MSCABC: + case MSPACK_VER_MSCHMC: + case MSPACK_VER_MSLITD: + case MSPACK_VER_MSLITC: + case MSPACK_VER_MSHLPD: + case MSPACK_VER_MSHLPC: + case MSPACK_VER_MSSZDDC: + case MSPACK_VER_MSKWAJC: + case MSPACK_VER_MSOABC: + return 0; + } + return -1; +} + +int mspack_sys_selftest_internal(int offt_size) { + return (sizeof(off_t) == offt_size) ? MSPACK_ERR_OK : MSPACK_ERR_SEEK; +} + +/* validates a system structure */ +int mspack_valid_system(struct mspack_system *sys) { + return (sys != NULL) && (sys->open != NULL) && (sys->close != NULL) && + (sys->read != NULL) && (sys->write != NULL) && (sys->seek != NULL) && + (sys->tell != NULL) && (sys->message != NULL) && (sys->alloc != NULL) && + (sys->free != NULL) && (sys->copy != NULL) && (sys->null_ptr == NULL); +} + +/* returns the length of a file opened for reading */ +int mspack_sys_filelen(struct mspack_system *system, + struct mspack_file *file, off_t *length) +{ + off_t current; + + if (!system || !file || !length) return MSPACK_ERR_OPEN; + + /* get current offset */ + current = system->tell(file); + + /* seek to end of file */ + if (system->seek(file, (off_t) 0, MSPACK_SYS_SEEK_END)) { + return MSPACK_ERR_SEEK; + } + + /* get offset of end of file */ + *length = system->tell(file); + + /* seek back to original offset */ + if (system->seek(file, current, MSPACK_SYS_SEEK_START)) { + return MSPACK_ERR_SEEK; + } + + return MSPACK_ERR_OK; +} + + + +/* definition of mspack_default_system -- if the library is compiled with + * MSPACK_NO_DEFAULT_SYSTEM, no default system will be provided. Otherwise, + * an appropriate default system (e.g. the standard C library, or some native + * API calls) + */ + +#ifdef MSPACK_NO_DEFAULT_SYSTEM +struct mspack_system *mspack_default_system = NULL; +#else + +/* implementation of mspack_default_system for standard C library */ + +#include +#include +#include +#include + +struct mspack_file_p { + FILE *fh; + const char *name; +}; + +static struct mspack_file *msp_open(struct mspack_system *self, + const char *filename, int mode) +{ + struct mspack_file_p *fh; + const char *fmode; + + switch (mode) { + case MSPACK_SYS_OPEN_READ: fmode = "rb"; break; + case MSPACK_SYS_OPEN_WRITE: fmode = "wb"; break; + case MSPACK_SYS_OPEN_UPDATE: fmode = "r+b"; break; + case MSPACK_SYS_OPEN_APPEND: fmode = "ab"; break; + default: return NULL; + } + + if ((fh = (struct mspack_file_p *) malloc(sizeof(struct mspack_file_p)))) { + fh->name = filename; + if ((fh->fh = fopen(filename, fmode))) return (struct mspack_file *) fh; + free(fh); + } + return NULL; +} + +static void msp_close(struct mspack_file *file) { + struct mspack_file_p *self = (struct mspack_file_p *) file; + if (self) { + fclose(self->fh); + free(self); + } +} + +static int msp_read(struct mspack_file *file, void *buffer, int bytes) { + struct mspack_file_p *self = (struct mspack_file_p *) file; + if (self && buffer && bytes >= 0) { + size_t count = fread(buffer, 1, (size_t) bytes, self->fh); + if (!ferror(self->fh)) return (int) count; + } + return -1; +} + +static int msp_write(struct mspack_file *file, void *buffer, int bytes) { + struct mspack_file_p *self = (struct mspack_file_p *) file; + if (self && buffer && bytes >= 0) { + size_t count = fwrite(buffer, 1, (size_t) bytes, self->fh); + if (!ferror(self->fh)) return (int) count; + } + return -1; +} + +static int msp_seek(struct mspack_file *file, off_t offset, int mode) { + struct mspack_file_p *self = (struct mspack_file_p *) file; + if (self) { + switch (mode) { + case MSPACK_SYS_SEEK_START: mode = SEEK_SET; break; + case MSPACK_SYS_SEEK_CUR: mode = SEEK_CUR; break; + case MSPACK_SYS_SEEK_END: mode = SEEK_END; break; + default: return -1; + } +#if HAVE_FSEEKO + return fseeko(self->fh, offset, mode); +#else + return fseek(self->fh, offset, mode); +#endif + } + return -1; +} + +static off_t msp_tell(struct mspack_file *file) { + struct mspack_file_p *self = (struct mspack_file_p *) file; +#if HAVE_FSEEKO + return (self) ? (off_t) ftello(self->fh) : 0; +#else + return (self) ? (off_t) ftell(self->fh) : 0; +#endif +} + +static void msp_msg(struct mspack_file *file, const char *format, ...) { + va_list ap; + if (file) fprintf(stderr, "%s: ", ((struct mspack_file_p *) file)->name); + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); + fputc((int) '\n', stderr); + fflush(stderr); +} + +static void *msp_alloc(struct mspack_system *self, size_t bytes) { +#if DEBUG + /* make uninitialised data obvious */ + char *buf = malloc(bytes + 8); + if (buf) memset(buf, 0xDC, bytes); + *((size_t *)buf) = bytes; + return &buf[8]; +#else + return malloc(bytes); +#endif +} + +static void msp_free(void *buffer) { +#if DEBUG + char *buf = buffer; + size_t bytes; + if (buf) { + buf -= 8; + bytes = *((size_t *)buf); + /* make freed data obvious */ + memset(buf, 0xED, bytes); + free(buf); + } +#else + free(buffer); +#endif +} + +static void msp_copy(void *src, void *dest, size_t bytes) { + memcpy(dest, src, bytes); +} + +static struct mspack_system msp_system = { + &msp_open, &msp_close, &msp_read, &msp_write, &msp_seek, + &msp_tell, &msp_msg, &msp_alloc, &msp_free, &msp_copy, NULL +}; + +struct mspack_system *mspack_default_system = &msp_system; + +#endif diff --git a/third_party/mspack/system.h b/third_party/mspack/system.h new file mode 100644 index 000000000..826e89f3e --- /dev/null +++ b/third_party/mspack/system.h @@ -0,0 +1,113 @@ +/* This file is part of libmspack. + * (C) 2003-2018 Stuart Caie. + * + * libmspack is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License (LGPL) version 2.1 + * + * For further details, see the file COPYING.LIB distributed with libmspack + */ + +#ifndef MSPACK_SYSTEM_H +#define MSPACK_SYSTEM_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/* ensure config.h is read before mspack.h */ +#ifdef HAVE_CONFIG_H +# include +#endif + +#include + +/* assume exists */ +#include + +/* fix for problem with GCC 4 and glibc (thanks to Ville Skytta) + * http://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=150429 + */ +#ifdef read +# undef read +#endif + +/* Old GCCs don't have __func__, but __FUNCTION__: + * http://gcc.gnu.org/onlinedocs/gcc/Function-Names.html + */ +#if __STDC_VERSION__ < 199901L +# if __GNUC__ >= 2 +# define __func__ __FUNCTION__ +# else +# define __func__ "" +# endif +#endif + +#if DEBUG +# include +# define D(x) do { printf("%s:%d (%s) ",__FILE__, __LINE__, __func__); \ + printf x ; fputc('\n', stdout); fflush(stdout);} while (0); +#else +# define D(x) +#endif + +/* CAB supports searching through files over 4GB in size, and the CHM file + * format actively uses 64-bit offsets. These can only be fully supported + * if the system the code runs on supports large files. If not, the library + * will work as normal using only 32-bit arithmetic, but if an offset + * greater than 2GB is detected, an error message indicating the library + * can't support the file should be printed. + */ +#if HAVE_INTTYPES_H +# include +#else +# define PRId64 "lld" +# define PRIu64 "llu" +# define PRId32 "ld" +# define PRIu32 "lu" +#endif + +#include +#if ((defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS >= 64) || \ + (defined(FILESIZEBITS) && FILESIZEBITS >= 64) || \ + defined(_LARGEFILE_SOURCE) || defined(_LARGEFILE64_SOURCE) || \ + SIZEOF_OFF_T >= 8) +# define LARGEFILE_SUPPORT 1 +# define LD PRId64 +# define LU PRIu64 +#else +extern const char *largefile_msg; +# define LD PRId32 +# define LU PRIu32 +#endif + +/* endian-neutral reading of little-endian data */ +#define __egi32(a,n) ( ((((unsigned char *) a)[n+3]) << 24) | \ + ((((unsigned char *) a)[n+2]) << 16) | \ + ((((unsigned char *) a)[n+1]) << 8) | \ + ((((unsigned char *) a)[n+0]))) +#define EndGetI64(a) ((((unsigned long long int) __egi32(a,4)) << 32) | \ + ((unsigned int) __egi32(a,0))) +#define EndGetI32(a) __egi32(a,0) +#define EndGetI16(a) ((((a)[1])<<8)|((a)[0])) + +/* endian-neutral reading of big-endian data */ +#define EndGetM32(a) (((((unsigned char *) a)[0]) << 24) | \ + ((((unsigned char *) a)[1]) << 16) | \ + ((((unsigned char *) a)[2]) << 8) | \ + ((((unsigned char *) a)[3]))) +#define EndGetM16(a) ((((a)[0])<<8)|((a)[1])) + +extern struct mspack_system *mspack_default_system; + +/* returns the length of a file opened for reading */ +extern int mspack_sys_filelen(struct mspack_system *system, + struct mspack_file *file, off_t *length); + +/* validates a system structure */ +extern int mspack_valid_system(struct mspack_system *sys); + +#ifdef __cplusplus +} +#endif + +#endif