diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index ba0430d..0000000
--- a/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-__pycache__/
\ No newline at end of file
diff --git a/LICENSE.md b/LICENSE.md
index 50cd16d..9cecc1d 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -1,21 +1,674 @@
-MIT License
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
-Copyright (c) 2024 John Doty
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
+ Preamble
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ {one line to give the program's name and a brief idea of what it does.}
+ Copyright (C) {year} {name of author}
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ {project} Copyright (C) {year} {fullname}
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+.
diff --git a/README.md b/README.md
index b449b8e..80c7dec 100644
--- a/README.md
+++ b/README.md
@@ -1,126 +1,18 @@
# A collection of LR parser generators, from LR0 through LALR.
-This is a small helper library to generate LR parser tables.
+One day I read a tweet, asking for a tool which accepted a grammar and an
+input file and which then produced simple parsed output, without any kind of
+in-between. (There was other ranty stuff about how none of the existing tools
+really worked, but that was beside the point.)
-The primary inspiration for this library is tree-sitter, which also generates
-LR parsers for grammars written in a turing-complete language. Like that, we
-write grammars in a language, only we do it in Python instead of JavaScript.
+Upon reading the tweet, it occured to me that I didn't know how LR parsers
+worked and how they were generated, except in the broadest of terms. Thus, I
+set about writing this, learning as I went.
-Why Python? Because Python 3 is widely pre-installed on MacOS and Linux. This
-library requires nothing more than the basic standard library, and not even a
-new version of it. Therefore, it turns out to be a pretty light dependency for
-a rust or C++ or something kind of project. (Tree-sitter, on the other hand,
-requires node, which is a far less stable and available runtime in 2024.)
-
-The parser tables can really be used to power anything. I prefer to make
-concrete syntax trees (again, see tree-sitter), and there is no facility at all
-for actions or custom ASTs or whatnot. Any such processing needs to be done by
-the thing that processes the tables.
-
-## Making Grammars
-
-To get started, create a grammar that derives from the `Grammar` class. Create
-one method per nonterminal, decorated with the `rule` decorator. Here's an
-example:
-
- PLUS = Token('+')
- LPAREN = Token('(')
- RPAREN = Token(')')
- ID = Token('id')
-
- class SimpleGrammar(Grammar):
- @rule
- def expression(self):
- return seq(self.expression, PLUS, self.term) | self.term
-
- @rule
- def term(self):
- return seq(LPAREN, self.expression, RPAREN) | ID
-
-
-## Using grammars
-
-TODO
-
-## Representation Choices
-
-The SimpleGrammar class might seem a little verbose compared to a dense
-structure like:
-
- grammar_simple = [
- ('E', ['E', '+', 'T']),
- ('E', ['T']),
- ('T', ['(', 'E', ')']),
- ('T', ['id']),
- ]
-
-or
-
- grammar_simple = {
- 'E': [
- ['E', '+', 'T'],
- ['T'],
- ],
- 'T': [
- ['(', 'E', ')'],
- ['id'],
- ],
- }
-
-
-The advantage that the class has over a table like this is that you get to have
-all of your Python tools help you make sure your grammar is good, if you want
-them. e.g., if you're working with an LSP or something, the members give you
-autocomplete and jump-to-definition and possibly even type-checking.
-
-At the very least, if you mis-type the name of a nonterminal, or forget to
-implement it, we will immediately raise an error that *INCLUDES THE LOCATION IN
-THE SOURCE WHERE THE ERROR WAS MADE.* With tables, we can tell you that you
-made a mistake but it's up to you to figure out where you did it.
-
-### Aside: What about a custom DSL/EBNF like thing?
-
-Yeah, OK, there's a rich history of writing your grammar in a domain-specific
-language. YACC did it, ANTLR does it, GRMTools.... just about everybody except
-Tree-Sitter does this.
-
-But look, I've got several reasons for not doing it.
-
-First, I'm lazy, and don't want to write yet another parser for my parser. What
-tools should I use to write my parser generator parser? I guess I don't have my
-parser generator parser yet, so probably a hand-written top down parser? Some
-other python parser generator? Ugh!
-
-As an add-on to that, if I make my own format then I need to make tooling for
-*that* too: syntax highlighters, jump to definition, the works. Yuck. An
-existing language, and a format that builds on an existing language, gets me the
-tooling that comes along with that language. If you can leverage that
-effictively (and I think I have) then you start way ahead in terms of tooling.
-
-Second, this whole thing is supposed to be easy to include in an existing
-project, and adding a custom compiler doesn't seem to be that. Adding two python
-files seems to be about the right speed.
-
-Thirdly, and this is just hypothetical, it's probably pretty easy to write your
-own tooling around a grammar if it's already in Python. If you want to make
-railroad diagrams or EBNF pictures or whatever, all the productions are already
-right there in data structures for you to process. I've tried to keep them
-accessible and at least somewhat easy to work with. There's nothing that says a
-DSL-based system *has* to produce unusable intermediate data- certainly there
-are some tools that *try*- but with this approach the accessibility and the
-ergonomics of the tool go hand in hand.
-
-## Some History
-
-The first version of this code was written as an idle exercise to learn how LR
-parser table generation even worked. It was... very simple, fairly easy to
-follow, and just *incredibly* slow. Like, mind-bogglingly slow. Unusably slow
-for anything but the most trivial grammar.
-
-As a result, when I decided I wanted to use it for a larger grammar, I found that
-I just couldn't. So this has been hacked and significantly improved from that
-version, now capable of building tables for nontrivial grammars. It could still
-be a lot faster, but it meets my needs for now.
+This code is not written to be fast, or even efficient, although it runs its
+test cases fast enough. It was instead written to be easy to follow along
+with, so that when I forget how all this works I can come back to the code
+and read along and learn all over again.
(BTW, the notes I read to learn how all this works are at
http://dragonbook.stanford.edu/lecture-notes/Stanford-CS143/. Specifically,
@@ -128,5 +20,7 @@ I started with handout 8, 'Bottom-up-parsing', and went from there. (I did
eventually have to backtrack a little into handout 7, since that's where
First() and Follow() are covered.)
+Enjoy!
+
doty
-May 2024
+2016-12-09
diff --git a/grammar.py b/grammar.py
deleted file mode 100644
index c37405f..0000000
--- a/grammar.py
+++ /dev/null
@@ -1,389 +0,0 @@
-# This is an example grammar.
-from parser import Assoc, Grammar, Nothing, Token, rule, seq
-
-ARROW = Token("Arrow")
-AS = Token("As")
-BAR = Token("Bar")
-CLASS = Token("Class")
-COLON = Token("Colon")
-ELSE = Token("Else")
-FOR = Token("For")
-FUN = Token("Fun")
-IDENTIFIER = Token("Identifier")
-IF = Token("If")
-IMPORT = Token("Import")
-IN = Token("In")
-LCURLY = Token("LeftBrace")
-LET = Token("Let")
-RCURLY = Token("RightBrace")
-RETURN = Token("Return")
-SEMICOLON = Token("Semicolon")
-STRING = Token("String")
-WHILE = Token("While")
-EQUAL = Token("Equal")
-LPAREN = Token("LeftParen")
-RPAREN = Token("RightParen")
-COMMA = Token("Comma")
-SELF = Token("Selff")
-OR = Token("Or")
-IS = Token("Is")
-AND = Token("And")
-EQUALEQUAL = Token("EqualEqual")
-BANGEQUAL = Token("BangEqual")
-LESS = Token("Less")
-GREATER = Token("Greater")
-LESSEQUAL = Token("LessEqual")
-GREATEREQUAL = Token("GreaterEqual")
-PLUS = Token("Plus")
-MINUS = Token("Minus")
-STAR = Token("Star")
-SLASH = Token("Slash")
-NUMBER = Token("Number")
-TRUE = Token("True")
-FALSE = Token("False")
-BANG = Token("Bang")
-DOT = Token("Dot")
-MATCH = Token("Match")
-EXPORT = Token("Export")
-UNDERSCORE = Token("Underscore")
-NEW = Token("New")
-LSQUARE = Token("LeftBracket")
-RSQUARE = Token("RightBracket")
-
-
-class FineGrammar(Grammar):
- def __init__(self):
- super().__init__(
- precedence=[
- (Assoc.RIGHT, [EQUAL]),
- (Assoc.LEFT, [OR]),
- (Assoc.LEFT, [IS]),
- (Assoc.LEFT, [AND]),
- (Assoc.LEFT, [EQUALEQUAL, BANGEQUAL]),
- (Assoc.LEFT, [LESS, GREATER, GREATEREQUAL, LESSEQUAL]),
- (Assoc.LEFT, [PLUS, MINUS]),
- (Assoc.LEFT, [STAR, SLASH]),
- (Assoc.LEFT, [self.primary_expression]),
- (Assoc.LEFT, [LPAREN]),
- (Assoc.LEFT, [DOT]),
- #
- # If there's a confusion about whether to make an IF
- # statement or an expression, prefer the statement.
- #
- (Assoc.NONE, [self.if_statement]),
- ]
- )
-
- @rule
- def file(self):
- return self.file_statement_list
-
- @rule
- def file_statement_list(self):
- return self.file_statement | (self.file_statement_list + self.file_statement)
-
- @rule
- def file_statement(self):
- return (
- self.import_statement | self.class_declaration | self.export_statement | self.statement
- )
-
- @rule
- def import_statement(self):
- return seq(IMPORT, STRING, AS, IDENTIFIER, SEMICOLON)
-
- @rule
- def class_declaration(self):
- return seq(CLASS, IDENTIFIER, self.class_body)
-
- @rule
- def class_body(self):
- return seq(LCURLY, RCURLY) | seq(LCURLY, self.class_members, RCURLY)
-
- @rule
- def class_members(self):
- return self.class_member | seq(self.class_members, self.class_member)
-
- @rule
- def class_member(self):
- return self.field_declaration | self.function_declaration
-
- @rule
- def field_declaration(self):
- return seq(IDENTIFIER, COLON, self.type_expression, SEMICOLON)
-
- # Types
- @rule
- def type_expression(self):
- return self.alternate_type | self.type_identifier
-
- @rule
- def alternate_type(self):
- return seq(self.type_expression, BAR, self.type_identifier)
-
- @rule
- def type_identifier(self):
- return IDENTIFIER
-
- @rule
- def export_statement(self):
- return (
- seq(EXPORT, self.class_declaration)
- | seq(EXPORT, self.function_declaration)
- | seq(EXPORT, self.let_statement)
- | seq(EXPORT, self.export_list, SEMICOLON)
- )
-
- @rule
- def export_list(self):
- return Nothing | IDENTIFIER | seq(IDENTIFIER, COMMA, self.export_list)
-
- # Functions
- @rule
- def function_declaration(self):
- return seq(FUN, IDENTIFIER, self.function_parameters, self.block) | seq(
- FUN, IDENTIFIER, self.function_parameters, ARROW, self.type_expression, self.block
- )
-
- @rule
- def function_parameters(self):
- return (
- seq(LPAREN, RPAREN)
- | seq(LPAREN, self.first_parameter, RPAREN)
- | seq(LPAREN, self.first_parameter, COMMA, self.parameter_list, RPAREN)
- )
-
- @rule
- def first_parameter(self):
- return SELF | self.parameter
-
- @rule
- def parameter_list(self):
- return Nothing | self.parameter | seq(self.parameter, COMMA, self.parameter_list)
-
- @rule
- def parameter(self):
- return seq(IDENTIFIER, COLON, self.type_expression)
-
- # Block
- @rule
- def block(self):
- return (
- seq(LCURLY, RCURLY)
- | seq(LCURLY, self.statement_list, RCURLY)
- | seq(LCURLY, self.statement_list, self.expression, RCURLY)
- )
-
- @rule
- def statement_list(self):
- return self.statement | seq(self.statement_list, self.statement)
-
- @rule
- def statement(self):
- return (
- self.function_declaration
- | self.let_statement
- | self.return_statement
- | self.for_statement
- | self.if_statement
- | self.while_statement
- | self.expression_statement
- )
-
- @rule
- def let_statement(self):
- return seq(LET, IDENTIFIER, EQUAL, self.expression, SEMICOLON)
-
- @rule
- def return_statement(self):
- return seq(RETURN, self.expression, SEMICOLON)
-
- @rule
- def for_statement(self):
- return seq(FOR, self.iterator_variable, IN, self.expression, self.block)
-
- @rule
- def iterator_variable(self):
- return IDENTIFIER
-
- @rule
- def if_statement(self):
- return self.conditional_expression
-
- @rule
- def while_statement(self):
- return seq(WHILE, self.expression, self.block)
-
- @rule
- def expression_statement(self):
- return seq(self.expression, SEMICOLON)
-
- # Expressions
- @rule
- def expression(self):
- return self.assignment_expression
-
- @rule
- def assignment_expression(self):
- return seq(self.or_expression, EQUAL, self.assignment_expression) | self.or_expression
-
- @rule
- def or_expression(self):
- return seq(self.or_expression, OR, self.is_expression) | self.is_expression
-
- @rule
- def is_expression(self):
- return seq(self.is_expression, IS, self.pattern) | self.and_expression
-
- @rule
- def and_expression(self):
- return seq(self.and_expression, AND, self.equality_expression) | self.equality_expression
-
- @rule
- def equality_expression(self):
- return (
- seq(self.equality_expression, EQUALEQUAL, self.relation_expression)
- | seq(self.equality_expression, BANGEQUAL, self.relation_expression)
- | self.relation_expression
- )
-
- @rule
- def relation_expression(self):
- return (
- seq(self.relation_expression, LESS, self.additive_expression)
- | seq(self.relation_expression, LESSEQUAL, self.additive_expression)
- | seq(self.relation_expression, GREATER, self.additive_expression)
- | seq(self.relation_expression, GREATEREQUAL, self.additive_expression)
- )
-
- @rule
- def additive_expression(self):
- return (
- seq(self.additive_expression, PLUS, self.multiplication_expression)
- | seq(self.additive_expression, MINUS, self.multiplication_expression)
- | self.multiplication_expression
- )
-
- @rule
- def multiplication_expression(self):
- return (
- seq(self.multiplication_expression, STAR, self.primary_expression)
- | seq(self.multiplication_expression, SLASH, self.primary_expression)
- | self.primary_expression
- )
-
- @rule
- def primary_expression(self):
- return (
- IDENTIFIER
- | SELF
- | NUMBER
- | STRING
- | TRUE
- | FALSE
- | seq(BANG, self.primary_expression)
- | seq(MINUS, self.primary_expression)
- | self.block
- | self.conditional_expression
- | self.list_constructor_expression
- | self.object_constructor_expression
- | self.match_expression
- | seq(self.primary_expression, LPAREN, self.expression_list, RPAREN)
- | seq(self.primary_expression, DOT, IDENTIFIER)
- | seq(LPAREN, self.expression, RPAREN)
- )
-
- @rule
- def conditional_expression(self):
- return (
- seq(IF, self.expression, self.block)
- | seq(IF, self.expression, self.block, ELSE, self.conditional_expression)
- | seq(IF, self.expression, self.block, ELSE, self.block)
- )
-
- @rule
- def list_constructor_expression(self):
- return seq(LSQUARE, RSQUARE) | seq(LSQUARE, self.expression_list, RSQUARE)
-
- @rule
- def expression_list(self):
- return (
- self.expression
- | seq(self.expression, COMMA)
- | seq(self.expression, COMMA, self.expression_list)
- )
-
- @rule
- def match_expression(self):
- return seq(MATCH, self.match_body)
-
- @rule
- def match_body(self):
- return seq(LCURLY, RCURLY) | seq(LCURLY, self.match_arms, RCURLY)
-
- @rule
- def match_arms(self):
- return (
- self.match_arm
- | seq(self.match_arm, COMMA)
- | seq(self.match_arm, COMMA, self.match_arms)
- )
-
- @rule
- def match_arm(self):
- return seq(self.pattern, ARROW, self.expression)
-
- @rule
- def pattern(self):
- return (
- seq(self.variable_binding, self.pattern_core, AND, self.and_expression)
- | seq(self.variable_binding, self.pattern_core)
- | seq(self.pattern_core, AND, self.and_expression)
- | self.pattern_core
- )
-
- @rule
- def pattern_core(self):
- return self.type_expression | self.wildcard_pattern
-
- @rule
- def wildcard_pattern(self):
- return UNDERSCORE
-
- @rule
- def variable_binding(self):
- return seq(IDENTIFIER, COLON)
-
- @rule
- def object_constructor_expression(self):
- return seq(NEW, self.type_identifier, self.field_list)
-
- @rule
- def field_list(self):
- return seq(LCURLY, RCURLY) | seq(LCURLY, self.field_values, RCURLY)
-
- @rule
- def field_values(self):
- return (
- self.field_value
- | seq(self.field_value, COMMA)
- | seq(self.field_value, COMMA, self.field_values)
- )
-
- @rule
- def field_value(self):
- return IDENTIFIER | seq(IDENTIFIER, COLON, self.expression)
-
-
-grammar = FineGrammar()
-table = grammar.build_table(start="file")
-
-print(f"{len(table)} states")
-
-average_entries = sum(len(row) for row in table) / len(table)
-max_entries = max(len(row) for row in table)
-print(f"{average_entries} average, {max_entries} max")
-
-# print(parser_faster.format_table(gen, table))
-# print()
-# tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"])
diff --git a/historical/parser.py b/historical/parser.py
deleted file mode 100644
index 17101bd..0000000
--- a/historical/parser.py
+++ /dev/null
@@ -1,853 +0,0 @@
-"""A collection of LR parser generators, from LR0 through LALR.
-
-One day I read a tweet, asking for a tool which accepted a grammar and an
-input file and which then produced simple parsed output, without any kind of
-in-between. (There was other ranty stuff about how none of the existing tools
-really worked, but that was beside the point.)
-
-Upon reading the tweet, it occured to me that I didn't know how LR parsers
-worked and how they were generated, except in the broadest of terms. Thus, I
-set about writing this, learning as I went.
-
-This code is not written to be fast, or even efficient, although it runs its
-test cases fast enough. It was instead written to be easy to follow along
-with, so that when I forget how all this works I can come back to the code
-and read along and learn all over again.
-
-(BTW, the notes I read to learn how all this works are at
-http://dragonbook.stanford.edu/lecture-notes/Stanford-CS143/. Specifically,
-I started with handout 8, 'Bottom-up-parsing', and went from there. (I did
-eventually have to backtrack a little into handout 7, since that's where
-First() and Follow() are covered.)
-
-Enjoy!
-
-doty
-2016-12-09
-"""
-
-from collections import namedtuple
-
-
-###############################################################################
-# LR0
-#
-# We start with LR0 parsers, because they form the basis of everything else.
-###############################################################################
-class Configuration(namedtuple("Configuration", ["name", "symbols", "position", "lookahead"])):
- """A rule being tracked in a state.
-
- (Note: technically, lookahead isn't used until we get to LR(1) parsers,
- but if left at its default it's harmless. Ignore it until you get to
- the part about LR(1).)
- """
-
- __slots__ = ()
-
- @classmethod
- def from_rule(cls, rule, lookahead=()):
- return Configuration(
- name=rule[0],
- symbols=rule[1],
- position=0,
- lookahead=lookahead,
- )
-
- @property
- def at_end(self):
- return self.position == len(self.symbols)
-
- @property
- def next(self):
- return self.symbols[self.position] if not self.at_end else None
-
- @property
- def rest(self):
- return self.symbols[(self.position + 1) :]
-
- def at_symbol(self, symbol):
- return self.next == symbol
-
- def replace(self, **kwargs):
- return self._replace(**kwargs)
-
- def __str__(self):
- la = ", " + str(self.lookahead) if self.lookahead != () else ""
- return "{name} -> {bits}{lookahead}".format(
- name=self.name,
- bits=" ".join(
- ["* " + sym if i == self.position else sym for i, sym in enumerate(self.symbols)]
- )
- + (" *" if self.at_end else ""),
- lookahead=la,
- )
-
-
-class GenerateLR0(object):
- """Generate parser tables for an LR0 parser.
-
- The input grammars are of the form:
-
- grammar_simple = [
- ('E', ['E', '+', 'T']),
- ('E', ['T']),
- ('T', ['(', 'E', ')']),
- ('T', ['id']),
- ]
-
- Which is to say, they are a list of productions. Each production is a
- tuple where the first element of the tuple is the name of the
- non-terminal being added, and the second elment of the tuple is the
- list of terminals and non-terminals that make up the production.
-
- There is currently no support for custom actions or alternation or
- anything like that. If you want alternations that you'll have to lower
- the grammar by hand into the simpler form first.
-
- Don't name anything with double-underscores; those are reserved for
- the generator. Don't add '$' either, as it is reserved to mean
- end-of-stream. Use an empty list to indicate nullability, that is:
-
- ('O', []),
-
- means that O can be matched with nothing.
-
- Implementation notes:
- - This is implemented in the dumbest way possible, in order to be the
- most understandable it can be. I built this to learn, and I want to
- make sure I can keep learning with it.
-
- - We tend to use tuples everywhere. This is because tuples can be
- compared for equality and put into tables and all that jazz. They might
- be a little bit slower in places but like I said, this is for
- learning. (Also, if we need this to run faster we can probably go a
- long way by memoizing results, which is much easier if we have tuples
- everywhere.)
- """
-
- def __init__(self, start, grammar):
- """Initialize the parser generator with the specified grammar and
- start symbol.
- """
- # We always store the "augmented" grammar, which contains an initial
- # production for the start state. grammar[0] is always the start
- # rule, and in the set of states and table and whatever the first
- # element is always the starting state/position.
- self.grammar = [("__start", [start])] + grammar
- self.nonterminals = {rule[0] for rule in grammar}
- self.terminals = {
- sym for name, symbols in grammar for sym in symbols if sym not in self.nonterminals
- }
- self.alphabet = self.terminals | self.nonterminals
-
- # Check to make sure they didn't use anything that will give us
- # heartburn later.
- reserved = [a for a in self.alphabet if a.startswith("__") or a == "$"]
- if reserved:
- raise ValueError(
- "Can't use {symbols} in grammars, {what} reserved.".format(
- symbols=" or ".join(reserved),
- what="it's" if len(reserved) == 1 else "they're",
- )
- )
-
- self.terminals.add("$")
- self.alphabet.add("$")
-
- def gen_closure_next(self, config):
- """Return the next set of configurations in the closure for
- config.
-
- If the position for config is just before a non-terminal, then the
- next set of configurations is configurations for all of the
- productions for that non-terminal, with the position at the
- beginning. (If the position for config is just before a terminal,
- or at the end of the production, then the next set is empty.)
- """
- if config.at_end:
- return ()
- else:
- return tuple(
- Configuration.from_rule(rule) for rule in self.grammar if rule[0] == config.next
- )
-
- def gen_closure(self, config, closure):
- """Compute the closure for the specified config and unify it with the
- existing closure.
-
- If the provided config is already in the closure then nothing is
- done. (We assume that the closure of the config is *also* already in
- the closure.)
- """
- if config in closure:
- return closure
- else:
- new_closure = tuple(closure) + (config,)
- for next_config in self.gen_closure_next(config):
- new_closure = self.gen_closure(next_config, new_closure)
- return new_closure
-
- def gen_successor(self, config_set, symbol):
- """Compute the successor state for the given config set and the
- given symbol.
-
- The successor represents the next state of the parser after seeing
- the symbol.
- """
- seeds = [
- config.replace(position=config.position + 1)
- for config in config_set
- if config.at_symbol(symbol)
- ]
-
- closure = ()
- for seed in seeds:
- closure = self.gen_closure(seed, closure)
-
- return closure
-
- def gen_all_successors(self, config_set):
- """Return all of the non-empty successors for the given config set."""
- next = []
- for symbol in self.alphabet:
- successor = self.gen_successor(config_set, symbol)
- if len(successor) > 0:
- next.append(successor)
-
- return tuple(next)
-
- def gen_sets(self, config_set, F):
- """Recursively generate all configuration sets starting from the
- provided set, and merge them with the provided set 'F'.
- """
- if config_set in F:
- return F
- else:
- new_F = F + (config_set,)
- for successor in self.gen_all_successors(config_set):
- new_F = self.gen_sets(successor, new_F)
-
- return new_F
-
- def gen_all_sets(self):
- """Generate all of the configuration sets for the grammar."""
- initial_set = self.gen_closure(
- Configuration.from_rule(self.grammar[0]),
- (),
- )
- return self.gen_sets(initial_set, ())
-
- def find_set_index(self, sets, set):
- """Find the specified set in the set of sets, and return the
- index, or None if it is not found.
- """
- for i, s in enumerate(sets):
- if s == set:
- return i
- return None
-
- def gen_reduce_set(self, config):
- """Return the set of symbols that indicate we should reduce the given
- configuration.
-
- In an LR0 parser, this is just the set of all terminals."""
- return self.terminals
-
- def gen_table(self):
- """Generate the parse table.
-
- The parse table is a list of states. The first state in the list is
- the starting state. Each state is a dictionary that maps a symbol to an
- action. Each action is a tuple. The first element of the tuple is a
- string describing what to do:
-
- - 'shift': The second element of the tuple is the state
- number. Consume the input and push that state onto the stack.
-
- - 'reduce': The second element is the name of the non-terminal being
- reduced, and the third element is the number of states to remove
- from the stack. Don't consume the input; just remove the specified
- number of things from the stack, and then consult the table again,
- this time using the new top-of-stack as the current state and the
- name of the non-terminal to find out what to do.
-
- - 'goto': The second element is the state number to push onto the
- stack. In the literature, these entries are treated distinctly from
- the actions, but we mix them here because they never overlap with the
- other actions. (These are always associated with non-terminals, and
- the other actions are always associated with terminals.)
-
- - 'accept': Accept the result of the parse, it worked.
-
- Anything missing from the row indicates an error.
- """
- action_table = []
- config_sets = self.gen_all_sets()
- for config_set in config_sets:
- actions = {}
-
- # Actions
- for config in config_set:
- if config.at_end:
- if config.name != "__start":
- for a in self.gen_reduce_set(config):
- self.set_table_action(
- actions,
- a,
- ("reduce", config.name, len(config.symbols)),
- config,
- )
- else:
- self.set_table_action(
- actions,
- "$",
- ("accept",),
- config,
- )
-
- else:
- if config.next in self.terminals:
- successor = self.gen_successor(config_set, config.next)
- index = self.find_set_index(config_sets, successor)
- self.set_table_action(
- actions,
- config.next,
- ("shift", index),
- config,
- )
-
- # Gotos
- for symbol in self.nonterminals:
- successor = self.gen_successor(config_set, symbol)
- index = self.find_set_index(config_sets, successor)
- if index is not None:
- self.set_table_action(
- actions,
- symbol,
- ("goto", index),
- None,
- )
-
- # set_table_action stores the configs that generated the actions in
- # the table, for diagnostic purposes. This filters them out again
- # so that the parser has something clean to work with.
- actions = {k: self.get_table_action(actions, k) for k in actions}
- action_table.append(actions)
-
- return action_table
-
- def set_table_action(self, row, symbol, action, config):
- """Set the action for 'symbol' in the table row to 'action'.
-
- This is destructive; it changes the table. It raises an error if
- there is already an action for the symbol in the row.
- """
- existing, existing_config = row.get(symbol, (None, None))
- if existing is not None and existing != action:
- config_old = str(existing_config)
- config_new = str(config)
- max_len = max(len(config_old), len(config_new)) + 1
- error = (
- "Conflicting actions for token '{symbol}':\n"
- " {config_old: <{max_len}}: {old}\n"
- " {config_new: <{max_len}}: {new}\n".format(
- config_old=config_old,
- config_new=config_new,
- max_len=max_len,
- old=existing,
- new=action,
- symbol=symbol,
- )
- )
- raise ValueError(error)
- row[symbol] = (action, config)
-
- def get_table_action(self, row, symbol):
- return row[symbol][0]
-
-
-def parse(table, input, trace=False):
- """Parse the input with the generated parsing table and return the
- concrete syntax tree.
-
- The parsing table can be generated by GenerateLR0.gen_table() or by any
- of the other generators below. The parsing mechanism never changes, only
- the table generation mechanism.
-
- input is a list of tokens. Don't stick an end-of-stream marker, I'll stick
- one on for you.
- """
- assert "$" not in input
- input = input + ["$"]
- input_index = 0
-
- # Our stack is a stack of tuples, where the first entry is the state number
- # and the second entry is the 'value' that was generated when the state was
- # pushed.
- stack = [(0, None)]
- while True:
- current_state = stack[-1][0]
- current_token = input[input_index]
-
- action = table[current_state].get(current_token, ("error",))
- if trace:
- print(
- "{stack: <20} {input: <50} {action: <5}".format(
- stack=repr([s[0] for s in stack]),
- input=repr(input[input_index:]),
- action=repr(action),
- )
- )
-
- if action[0] == "accept":
- return stack[-1][1]
-
- elif action[0] == "reduce":
- name = action[1]
- size = action[2]
-
- value = (name, tuple(s[1] for s in stack[-size:]))
- stack = stack[:-size]
-
- goto = table[stack[-1][0]].get(name, ("error",))
- assert goto[0] == "goto" # Corrupt table?
- stack.append((goto[1], value))
-
- elif action[0] == "shift":
- stack.append((action[1], (current_token, ())))
- input_index += 1
-
- elif action[0] == "error":
- raise ValueError(
- "Syntax error: unexpected symbol {sym}".format(
- sym=current_token,
- ),
- )
-
-
-###############################################################################
-# SLR(1)
-###############################################################################
-class GenerateSLR1(GenerateLR0):
- """Generate parse tables for SLR1 grammars.
-
- SLR1 parsers can recognize more than LR0 parsers, because they have a
- little bit more information: instead of generating reduce actions for a
- production on all possible inputs, as LR0 parsers do, they generate
- reduce actions only for inputs that are in the 'follow' set of the
- non-terminal.
-
- That means SLR1 parsers need to know how to generate 'follow(A)', which
- means they need to know how to generate 'first(A)', which is most of the
- code in this class.
- """
-
- def gen_first_symbol(self, symbol, visited):
- """Compute the first set for a single symbol.
-
- If a symbol can be empty, then the set contains epsilon, which we
- represent as python's `None`.
-
- The first set is the set of tokens that can appear as the first token
- for a given symbol. (Obviously, if the symbol is itself a token, then
- this is trivial.)
-
- 'visited' is a set of already visited symbols, to stop infinite
- recursion on left-recursive grammars. That means that sometimes this
- function can return an empty tuple. Don't confuse that with a tuple
- containing epsilon: that's a tuple containing `None`, not an empty
- tuple.
- """
- if symbol in self.terminals:
- return (symbol,)
- elif symbol in visited:
- return ()
- else:
- assert symbol in self.nonterminals
- visited.add(symbol)
-
- # All the firsts from all the productions.
- firsts = [
- self.gen_first(rule[1], visited) for rule in self.grammar if rule[0] == symbol
- ]
-
- result = ()
- for fs in firsts:
- result = result + tuple(f for f in fs if f not in result)
-
- return tuple(sorted(result))
-
- def gen_first(self, symbols, visited=None):
- """Compute the first set for a sequence of symbols.
-
- The first set is the set of tokens that can appear as the first token
- for this sequence of symbols. The interesting wrinkle in computing the
- first set for a sequence of symbols is that we keep computing the first
- sets so long as epsilon appears in the set. i.e., if we are computing
- for ['A', 'B', 'C'] and the first set of 'A' contains epsilon, then the
- first set for the *sequence* also contains the first set of ['B', 'C'],
- since 'A' could be missing entirely.
-
- An epsilon in the result is indicated by 'None'. There will always be
- at least one element in the result.
-
- The 'visited' parameter, if not None, is a set of symbols that are
- already in the process of being evaluated, to deal with left-recursive
- grammars. (See gen_first_symbol for more.)
- """
- if len(symbols) == 0:
- return (None,) # Epsilon.
- else:
- if visited is None:
- visited = set()
- result = self.gen_first_symbol(symbols[0], visited)
- if None in result:
- result = tuple(s for s in result if s is not None)
- result = result + self.gen_first(symbols[1:], visited)
- result = tuple(sorted(set(result)))
- return result
-
- def gen_follow(self, symbol, visited=None):
- """Generate the follow set for the given nonterminal.
-
- The follow set for a nonterminal is the set of terminals that can
- follow the nonterminal in a valid sentence. The resulting set never
- contains epsilon and is never empty, since we should always at least
- ground out at '$', which is the end-of-stream marker.
- """
- if symbol == "__start":
- return tuple("$")
-
- assert symbol in self.nonterminals
-
- # Deal with left-recursion.
- if visited is None:
- visited = set()
- if symbol in visited:
- return ()
- visited.add(symbol)
-
- follow = ()
- for production in self.grammar:
- for index, prod_symbol in enumerate(production[1]):
- if prod_symbol != symbol:
- continue
-
- first = self.gen_first(production[1][index + 1 :])
- follow = follow + tuple(f for f in first if f is not None)
- if None in first:
- follow = follow + self.gen_follow(production[0], visited)
-
- assert None not in follow # Should always ground out at __start
- return follow
-
- def gen_reduce_set(self, config):
- """Return the set of symbols that indicate we should reduce the given
- config.
-
- In an SLR1 parser, this is the follow set of the config nonterminal."""
- return self.gen_follow(config.name)
-
-
-class GenerateLR1(GenerateSLR1):
- """Generate parse tables for LR1, or "canonical LR" grammars.
-
- LR1 parsers can recognize more than SLR parsers. Like SLR parsers, they
- are choosier about when they reduce. But unlike SLR parsers, they specify
- the terminals on which they reduce by carrying a 'lookahead' terminal in
- the configuration. The lookahead of a configuration is computed as the
- closure of a configuration set is computed, so see gen_closure_next for
- details. (Except for the start configuration, which has '$' as its
- lookahead.)
- """
-
- def gen_reduce_set(self, config):
- """Return the set of symbols that indicate we should reduce the given
- config.
-
- In an LR1 parser, this is the lookahead of the configuration."""
- return config.lookahead
-
- def gen_closure_next(self, config):
- """Return the next set of configurations in the closure for
- config.
-
- In LR1 parsers, we must compute the lookahead for the configurations
- we're adding to the closure. The lookahead for the new configurations
- is the first() of the rest of this config's production. If that
- contains epsilon, then the lookahead *also* contains the lookahead we
- already have. (This lookahead was presumably generated by the same
- process, so in some sense it is a 'parent' lookahead, or a lookahead
- from an upstream production in the grammar.)
-
- (See the documentation in GenerateLR0 for more information on how
- this function fits into the whole process.)
- """
- if config.at_end:
- return ()
- else:
- next = []
- for rule in self.grammar:
- if rule[0] != config.next:
- continue
-
- # N.B.: We can't just append config.lookahead to config.rest
- # and compute first(), because lookahead is a *set*. So
- # in this case we just say if 'first' contains epsilon,
- # then we need to remove the epsilon and union with the
- # existing lookahead.
- lookahead = self.gen_first(config.rest)
- if None in lookahead:
- lookahead = tuple(l for l in lookahead if l is not None)
- lookahead = lookahead + config.lookahead
- lookahead = tuple(sorted(set(lookahead)))
- next.append(Configuration.from_rule(rule, lookahead=lookahead))
-
- return tuple(next)
-
- def gen_all_sets(self):
- """Generate all of the configuration sets for the grammar.
-
- In LR1 parsers, we must remember to set the lookahead of the start
- symbol to '$'.
- """
- initial_set = self.gen_closure(
- Configuration.from_rule(self.grammar[0], lookahead=("$",)),
- (),
- )
- return self.gen_sets(initial_set, ())
-
-
-class GenerateLALR(GenerateLR1):
- """Generate tables for LALR.
-
- LALR is smaller than LR(1) but bigger than SLR(1). It works by generating
- the LR(1) configuration sets, but merging configuration sets which are
- equal in everything but their lookaheads. This works in that it doesn't
- generate any shift/reduce conflicts that weren't already in the LR(1)
- grammar. It can, however, introduce new reduce/reduce conflicts, because
- it does lose information. The advantage is that the number of parser
- states is much much smaller in LALR than in LR(1).
-
- (Note that because we use immutable state everywhere this generator does
- a lot of copying and allocation.)
- """
-
- def merge_sets(self, config_set_a, config_set_b):
- """Merge the two config sets, by keeping the item cores but merging
- the lookahead sets for each item.
- """
- assert len(config_set_a) == len(config_set_b)
- merged = []
- for index, a in enumerate(config_set_a):
- b = config_set_b[index]
- assert a.replace(lookahead=()) == b.replace(lookahead=())
-
- new_lookahead = a.lookahead + b.lookahead
- new_lookahead = tuple(sorted(set(new_lookahead)))
- merged.append(a.replace(lookahead=new_lookahead))
-
- return tuple(merged)
-
- def sets_equal(self, a, b):
- a_no_la = tuple(s.replace(lookahead=()) for s in a)
- b_no_la = tuple(s.replace(lookahead=()) for s in b)
- return a_no_la == b_no_la
-
- def gen_sets(self, config_set, F):
- """Recursively generate all configuration sets starting from the
- provided set, and merge them with the provided set 'F'.
-
- The difference between this method and the one in GenerateLR0, where
- this comes from, is in the part that stops recursion. In LALR we
- compare for set equality *ignoring lookahead*. If we find a match,
- then instead of returning F unchanged, we merge the two equal sets
- and replace the set in F, returning the modified set.
- """
- config_set_no_la = tuple(s.replace(lookahead=()) for s in config_set)
- for index, existing in enumerate(F):
- existing_no_la = tuple(s.replace(lookahead=()) for s in existing)
- if config_set_no_la == existing_no_la:
- merged_set = self.merge_sets(config_set, existing)
- return F[:index] + (merged_set,) + F[index + 1 :]
-
- # No merge candidate found, proceed.
- new_F = F + (config_set,)
- for successor in self.gen_all_successors(config_set):
- new_F = self.gen_sets(successor, new_F)
-
- return new_F
-
- def find_set_index(self, sets, set):
- """Find the specified set in the set of sets, and return the
- index, or None if it is not found.
- """
- for i, s in enumerate(sets):
- if self.sets_equal(s, set):
- return i
- return None
-
-
-###############################################################################
-# Formatting
-###############################################################################
-def format_node(node):
- """Print out an indented concrete syntax tree, from parse()."""
- lines = ["{name}".format(name=node[0])] + [
- " " + line for child in node[1] for line in format_node(child).split("\n")
- ]
- return "\n".join(lines)
-
-
-def format_table(generator, table):
- """Format a parser table so pretty."""
-
- def format_action(state, terminal):
- action = state.get(terminal, ("error",))
- if action[0] == "accept":
- return "accept"
- elif action[0] == "shift":
- return "s" + str(action[1])
- elif action[0] == "error":
- return ""
- elif action[0] == "reduce":
- return "r" + str(action[1])
-
- header = " | {terms} | {nts}".format(
- terms=" ".join("{0: <6}".format(terminal) for terminal in sorted(generator.terminals)),
- nts=" ".join("{0: <5}".format(nt) for nt in sorted(generator.nonterminals)),
- )
-
- lines = [
- header,
- "-" * len(header),
- ] + [
- "{index: <3} | {actions} | {gotos}".format(
- index=i,
- actions=" ".join(
- "{0: <6}".format(format_action(row, terminal))
- for terminal in sorted(generator.terminals)
- ),
- gotos=" ".join(
- "{0: <5}".format(row.get(nt, ("error", ""))[1])
- for nt in sorted(generator.nonterminals)
- ),
- )
- for i, row in enumerate(table)
- ]
- return "\n".join(lines)
-
-
-###############################################################################
-# Examples
-###############################################################################
-# OK, this is a very simple LR0 grammar.
-grammar_simple = [
- ("E", ["E", "+", "T"]),
- ("E", ["T"]),
- ("T", ["(", "E", ")"]),
- ("T", ["id"]),
-]
-
-gen = GenerateLR0("E", grammar_simple)
-table = gen.gen_table()
-tree = parse(table, ["id", "+", "(", "id", ")"])
-print(format_node(tree) + "\n")
-print()
-
-# This one doesn't work with LR0, though, it has a shift/reduce conflict.
-grammar_lr0_shift_reduce = grammar_simple + [
- ("T", ["id", "[", "E", "]"]),
-]
-try:
- gen = GenerateLR0("E", grammar_lr0_shift_reduce)
- table = gen.gen_table()
- assert False
-except ValueError as e:
- print(e)
-print()
-
-# Nor does this: it has a reduce/reduce conflict.
-grammar_lr0_reduce_reduce = grammar_simple + [
- ("E", ["V", "=", "E"]),
- ("V", ["id"]),
-]
-try:
- gen = GenerateLR0("E", grammar_lr0_reduce_reduce)
- table = gen.gen_table()
- assert False
-except ValueError as e:
- print(e)
-print()
-
-# Nullable symbols just don't work with constructs like this, because you can't
-# look ahead to figure out if you should reduce an empty 'F' or not.
-grammar_nullable = [
- ("E", ["F", "boop"]),
- ("F", ["beep"]),
- ("F", []),
-]
-try:
- gen = GenerateLR0("E", grammar_nullable)
- table = gen.gen_table()
- assert False
-except ValueError as e:
- print(e)
-
-gen = GenerateSLR1("E", grammar_lr0_shift_reduce)
-print("First: {first}".format(first=str(gen.gen_first(["E"]))))
-print("Follow: {follow}".format(follow=str(gen.gen_follow("E"))))
-table = gen.gen_table()
-print(format_table(gen, table))
-tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"])
-print(format_node(tree) + "\n")
-print()
-
-# SLR1 can't handle this.
-grammar_aho_ullman_1 = [
- ("S", ["L", "=", "R"]),
- ("S", ["R"]),
- ("L", ["*", "R"]),
- ("L", ["id"]),
- ("R", ["L"]),
-]
-try:
- gen = GenerateSLR1("S", grammar_aho_ullman_1)
- table = gen.gen_table()
- assert False
-except ValueError as e:
- print(e)
-print()
-
-# Here's an example with a full LR1 grammar, though.
-grammar_aho_ullman_2 = [
- ("S", ["X", "X"]),
- ("X", ["a", "X"]),
- ("X", ["b"]),
-]
-gen = GenerateLR1("S", grammar_aho_ullman_2)
-table = gen.gen_table()
-print(format_table(gen, table))
-parse(table, ["b", "a", "a", "b"], trace=True)
-print()
-
-# What happens if we do LALR to it?
-gen = GenerateLALR("S", grammar_aho_ullman_2)
-table = gen.gen_table()
-print(format_table(gen, table))
-print()
-
-# A fun LALAR grammar.
-grammar_lalr = [
- ("S", ["V", "E"]),
- ("E", ["F"]),
- ("E", ["E", "+", "F"]),
- ("F", ["V"]),
- ("F", ["int"]),
- ("F", ["(", "E", ")"]),
- ("V", ["id"]),
-]
-gen = GenerateLALR("S", grammar_lalr)
-table = gen.gen_table()
-print(format_table(gen, table))
-print()
diff --git a/parser.py b/parser.py
index 8091fb7..656ef09 100644
--- a/parser.py
+++ b/parser.py
@@ -1,124 +1,18 @@
-"""This is a small helper library to generate LR parser tables.
+"""A collection of LR parser generators, from LR0 through LALR.
-The primary inspiration for this library is tree-sitter, which also generates
-LR parsers for grammars written in a turing-complete language. Like that, we
-write grammars in a language, only we do it in Python instead of JavaScript.
+One day I read a tweet, asking for a tool which accepted a grammar and an
+input file and which then produced simple parsed output, without any kind of
+in-between. (There was other ranty stuff about how none of the existing tools
+really worked, but that was beside the point.)
-Why Python? Because Python 3 is widely pre-installed on MacOS and Linux. This
-library requires nothing more than the basic standard library, and not even a
-new version of it. Therefore, it turns out to be a pretty light dependency for
-a rust or C++ or something kind of project. (Tree-sitter, on the other hand,
-requires node, which is a far less stable and available runtime in 2024.)
+Upon reading the tweet, it occured to me that I didn't know how LR parsers
+worked and how they were generated, except in the broadest of terms. Thus, I
+set about writing this, learning as I went.
-The parser tables can really be used to power anything. I prefer to make
-concrete syntax trees (again, see tree-sitter), and there is no facility at all
-for actions or custom ASTs or whatnot. Any such processing needs to be done by
-the thing that processes the tables.
-
-## Making Grammars
-
-To get started, create a grammar that derives from the `Grammar` class. Create
-one method per nonterminal, decorated with the `rule` decorator. Here's an
-example:
-
- PLUS = Token('+')
- LPAREN = Token('(')
- RPAREN = Token(')')
- ID = Token('id')
-
- class SimpleGrammar(Grammar):
- @rule
- def expression(self):
- return seq(self.expression, PLUS, self.term) | self.term
-
- @rule
- def term(self):
- return seq(LPAREN, self.expression, RPAREN) | ID
-
-
-## Using grammars
-
-TODO
-
-## Representation Choices
-
-The SimpleGrammar class might seem a little verbose compared to a dense
-structure like:
-
- grammar_simple = [
- ('E', ['E', '+', 'T']),
- ('E', ['T']),
- ('T', ['(', 'E', ')']),
- ('T', ['id']),
- ]
-
-or
-
- grammar_simple = {
- 'E': [
- ['E', '+', 'T'],
- ['T'],
- ],
- 'T': [
- ['(', 'E', ')'],
- ['id'],
- ],
- }
-
-
-The advantage that the class has over a table like this is that you get to have
-all of your Python tools help you make sure your grammar is good, if you want
-them. e.g., if you're working with an LSP or something, the members give you
-autocomplete and jump-to-definition and possibly even type-checking.
-
-At the very least, if you mis-type the name of a nonterminal, or forget to
-implement it, we will immediately raise an error that *INCLUDES THE LOCATION IN
-THE SOURCE WHERE THE ERROR WAS MADE.* With tables, we can tell you that you
-made a mistake but it's up to you to figure out where you did it.
-
-### Aside: What about a custom DSL/EBNF like thing?
-
-Yeah, OK, there's a rich history of writing your grammar in a domain-specific
-language. YACC did it, ANTLR does it, GRMTools.... just about everybody except
-Tree-Sitter does this.
-
-But look, I've got several reasons for not doing it.
-
-First, I'm lazy, and don't want to write yet another parser for my parser. What
-tools should I use to write my parser generator parser? I guess I don't have my
-parser generator parser yet, so probably a hand-written top down parser? Some
-other python parser generator? Ugh!
-
-As an add-on to that, if I make my own format then I need to make tooling for
-*that* too: syntax highlighters, jump to definition, the works. Yuck. An
-existing language, and a format that builds on an existing language, gets me the
-tooling that comes along with that language. If you can leverage that
-effictively (and I think I have) then you start way ahead in terms of tooling.
-
-Second, this whole thing is supposed to be easy to include in an existing
-project, and adding a custom compiler doesn't seem to be that. Adding two python
-files seems to be about the right speed.
-
-Thirdly, and this is just hypothetical, it's probably pretty easy to write your
-own tooling around a grammar if it's already in Python. If you want to make
-railroad diagrams or EBNF pictures or whatever, all the productions are already
-right there in data structures for you to process. I've tried to keep them
-accessible and at least somewhat easy to work with. There's nothing that says a
-DSL-based system *has* to produce unusable intermediate data- certainly there
-are some tools that *try*- but with this approach the accessibility and the
-ergonomics of the tool go hand in hand.
-
-## Some History
-
-The first version of this code was written as an idle exercise to learn how LR
-parser table generation even worked. It was... very simple, fairly easy to
-follow, and just *incredibly* slow. Like, mind-bogglingly slow. Unusably slow
-for anything but the most trivial grammar.
-
-As a result, when I decided I wanted to use it for a larger grammar, I found that
-I just couldn't. So this has been hacked and significantly improved from that
-version, now capable of building tables for nontrivial grammars. It could still
-be a lot faster, but it meets my needs for now.
+This code is not written to be fast, or even efficient, although it runs its
+test cases fast enough. It was instead written to be easy to follow along
+with, so that when I forget how all this works I can come back to the code
+and read along and learn all over again.
(BTW, the notes I read to learn how all this works are at
http://dragonbook.stanford.edu/lecture-notes/Stanford-CS143/. Specifically,
@@ -126,17 +20,12 @@ I started with handout 8, 'Bottom-up-parsing', and went from there. (I did
eventually have to backtrack a little into handout 7, since that's where
First() and Follow() are covered.)
-May 2024
-"""
+Enjoy!
-import abc
-import collections
-import dataclasses
-import enum
-import functools
-import inspect
-import sys
-import typing
+doty
+2016-12-09
+"""
+from collections import namedtuple
###############################################################################
@@ -144,624 +33,132 @@ import typing
#
# We start with LR0 parsers, because they form the basis of everything else.
###############################################################################
-class Configuration:
- """A rule being tracked in a state. That is, a specific position within a
- specific rule, with an associated lookahead state.
-
- We make a *lot* of these and we need/want to pre-cache a ton of things we
- ask about so we need to override __init__, otherwise it's immutable and
- fixed and doesn't have a dict to save space.
-
- It also supports hashing and equality and comparison, so it can be sorted
- and whatnot. This really is the workhorse data structure of the whole thing.
- If you can improve this you can improve the performance of everything probably.
+class Configuration(
+ namedtuple('Configuration', ['name', 'symbols', 'position', 'lookahead'])
+):
+ """A rule being tracked in a state.
(Note: technically, lookahead isn't used until we get to LR(1) parsers,
but if left at its default it's harmless. Ignore it until you get to
the part about LR(1).)
"""
-
- __slots__ = (
- "name",
- "symbols",
- "position",
- "lookahead",
- "next",
- "at_end",
- "_vals",
- "_hash",
- )
-
- name: int
- symbols: typing.Tuple[int, ...]
- position: int
- lookahead: typing.Tuple[int, ...]
- next: int | None
- at_end: bool
-
- _vals: typing.Tuple
- _hash: int
-
- def __init__(self, name, symbols, position, lookahead) -> None:
- self.name = name
- self.symbols = symbols
- self.position = position
- self.lookahead = lookahead
-
- at_end = position == len(symbols)
- self.at_end = at_end
- self.next = symbols[position] if not at_end else None
-
- self._vals = (name, symbols, position, lookahead)
- self._hash = hash(self._vals)
+ __slots__ = ()
@classmethod
- def from_rule(cls, name: int, symbols: typing.Tuple[int, ...], lookahead=()):
+ def from_rule(cls, rule, lookahead=()):
return Configuration(
- name=name,
- symbols=symbols,
+ name=rule[0],
+ symbols=rule[1],
position=0,
lookahead=lookahead,
)
- def __hash__(self) -> int:
- return self._hash
+ @property
+ def at_end(self):
+ return self.position == len(self.symbols)
- def __eq__(self, value: object, /) -> bool:
- if value is self:
- return True
- if not isinstance(value, Configuration):
- return NotImplemented
-
- return (
- value._hash == self._hash
- and value.name == self.name
- and value.position == self.position
- and value.symbols == self.symbols
- and value.lookahead == self.lookahead
- )
-
- def __lt__(self, value) -> bool:
- if not isinstance(value, Configuration):
- return NotImplemented
- return self._vals < value._vals
-
- def __gt__(self, value) -> bool:
- if not isinstance(value, Configuration):
- return NotImplemented
- return self._vals > value._vals
-
- def __le__(self, value) -> bool:
- if not isinstance(value, Configuration):
- return NotImplemented
- return self._vals <= value._vals
-
- def __ge__(self, value) -> bool:
- if not isinstance(value, Configuration):
- return NotImplemented
- return self._vals >= value._vals
-
- def replace_position(self, new_position):
- return Configuration(
- name=self.name,
- symbols=self.symbols,
- position=new_position,
- lookahead=self.lookahead,
- )
-
- def clear_lookahead(self):
- return Configuration(
- name=self.name,
- symbols=self.symbols,
- position=self.position,
- lookahead=(),
- )
+ @property
+ def next(self):
+ return self.symbols[self.position] if not self.at_end else None
@property
def rest(self):
- return self.symbols[(self.position + 1) :]
+ return self.symbols[(self.position+1):]
- def format(self, alphabet: list[str]) -> str:
- la = ", " + str(tuple(alphabet[i] for i in self.lookahead)) if self.lookahead != () else ""
+ def at_symbol(self, symbol):
+ return self.next == symbol
+
+ def replace(self, **kwargs):
+ return self._replace(**kwargs)
+
+ def __str__(self):
+ la = ", " + str(self.lookahead) if self.lookahead != () else ""
return "{name} -> {bits}{lookahead}".format(
- name=alphabet[self.name],
- bits=" ".join(
- [
- "* " + alphabet[sym] if i == self.position else alphabet[sym]
- for i, sym in enumerate(self.symbols)
- ]
- )
- + (" *" if self.at_end else ""),
+ name=self.name,
+ bits=' '.join([
+ '* ' + sym if i == self.position else sym
+ for i, sym in enumerate(self.symbols)
+ ]) + (' *' if self.at_end else ''),
lookahead=la,
)
-ConfigSet = typing.Tuple[Configuration, ...]
-
-
-class ConfigurationSetInfo:
- """When we build a grammar into a table, the first thing we need to do is
- generate all the configuration sets and their successors.
-
- (A configuration set is what it sounds like: an unordered set of
- Configuration structures. But we use Tuple because it's hashable and
- immutable and small and we order the Tuples so that we get repeatable
- results.)
-
- *This* is structure that tracks the result of that computation.
-
- (Different generators vary in the details of how they generate this
- structure, but they all compute this information.)
- """
-
- config_set_key: dict[ConfigSet, int] # Map a ConfigSet into am index
- sets: list[ConfigSet] # Map the index back into a set
-
- # All the sucessors for all of the sets. `successors[i]` is the mapping
- # from grammar symbol to the index of the set you get by processing that
- # symbol.
- successors: list[dict[int, int]]
-
- def __init__(self):
- self.config_set_key = {}
- self.sets = []
- self.successors = []
-
- def register_config_set(self, c: ConfigSet) -> typing.Tuple[int, bool]:
- """Potentially add a new config set to the set of sets. Returns the
- canonical ID of the set within this structure, along with a boolean
- indicating whether the set was just added or not.
-
- (You can use this integer to get the set back, if you need it, and
- also access the successors table.)
- """
- existing = self.config_set_key.get(c)
- if existing is not None:
- return existing, False
-
- index = len(self.sets)
- self.sets.append(c)
- self.successors.append({})
- self.config_set_key[c] = index
- return index, True
-
- def add_successor(self, c_id: int, symbol: int, successor: int):
- """Register sucessor(`c_id`, `symbol`) -> `successor`, where c_id
- is the id of the set in this structure, and symbol is the id of a
- symbol in the alphabet of the grammar.
- """
- self.successors[c_id][symbol] = successor
-
- def find_path_to_set(self, target_set: ConfigSet) -> list[int]:
- """Trace the path of grammar symbols from the first set (which always
- set 0) to the target set. This is useful in conflict reporting,
- because we'll be *at* a ConfigSet and want to show the grammar symbols
- that get us to where we found the conflict.
-
- The return value is a list of grammar symbols to get to the specified
- ConfigSet.
-
- This function raises KeyError if no path is found.
- """
- target_index = self.config_set_key[target_set]
- visited = set()
-
- queue: collections.deque = collections.deque()
- queue.appendleft((0, []))
- while len(queue) > 0:
- set_index, path = queue.pop()
- if set_index == target_index:
- return path
-
- if set_index in visited:
- continue
- visited.add(set_index)
-
- for symbol, successor in self.successors[set_index].items():
- queue.appendleft((successor, path + [symbol]))
-
- raise KeyError("Unable to find a path to the target set!")
-
-
-class Assoc(enum.Enum):
- """Associativity of a rule."""
-
- NONE = 0
- LEFT = 1
- RIGHT = 2
-
-
-class ErrorCollection:
- """A collection of errors. The errors are grouped by config set and alphabet
- symbol, so that we can group the error strings appropriately when we format
- the error.
- """
-
- errors: dict[ConfigSet, dict[int, dict[Configuration, typing.Tuple]]]
-
- def __init__(self):
- self.errors = {}
-
- def any(self) -> bool:
- """Return True if there are any errors in this collection."""
- return len(self.errors) > 0
-
- def add_error(
- self,
- config_set: ConfigSet,
- symbol: int,
- config: Configuration,
- action: typing.Tuple,
- ):
- """Add an error to the collection.
-
- config_set is the set with the error.
- symbol is the symbol we saw when we saw the error.
- config is the configuration that we were in when we saw the error.
- action is what we were trying to do.
-
- (This all makes more sense from inside the TableBuilder.)
- """
- set_errors = self.errors.get(config_set)
- if set_errors is None:
- set_errors = {}
- self.errors[config_set] = set_errors
-
- symbol_errors = set_errors.get(symbol)
- if symbol_errors is None:
- symbol_errors = {}
- set_errors[symbol] = symbol_errors
-
- symbol_errors[config] = action
-
- def format(
- self,
- alphabet: list[str],
- all_sets: ConfigurationSetInfo,
- ) -> str | None:
- """Format all the errors into a string, or return None if there are no
- errors.
-
- We need the alphabet to turn all these integers into something human
- readable, and all the sets to trace a path to where the errors were
- encountered.
- """
- if len(self.errors) is None:
- return None
-
- errors = []
- for config_set, set_errors in self.errors.items():
- path = all_sets.find_path_to_set(config_set)
- path_str = " ".join(alphabet[s] for s in path)
-
- for symbol, symbol_errors in set_errors.items():
- lines = []
- lines.append(
- f"When we have parsed '{path_str}' and see '{alphabet[symbol]}' we don't know whether:"
- )
- for config, action in symbol_errors.items():
- name = alphabet[config.name]
- rule = " ".join(
- f"{'* ' if config.position == i else ''}{alphabet[s]}"
- for i, s in enumerate(config.symbols)
- )
- if config.next is None:
- rule += " *"
-
- if action[0] == "reduce":
- action_str = f"pop {action[2]} values off the stack and make a {action[1]}"
- elif action[0] == "shift":
- action_str = "consume the token and keep going"
- elif action[0] == "accept":
- action_str = "accept the parse"
- else:
- assert action[0] == "goto", f"Unknown action {action[0]}"
- raise Exception("Shouldn't conflict on goto ever")
-
- lines.append(
- f" - We are in the rule `{name}: {rule}` and we should {action_str}"
- )
-
- errors.append("\n".join(lines))
-
- return "\n\n".join(errors)
-
-
-class TableBuilder(object):
- """A helper object to assemble actions into build parse tables.
-
- This is a builder type thing: call `new_row` at the start of
- each row, then `flush` when you're done with the last row.
- """
-
- errors: ErrorCollection
- table: list[dict[str, typing.Tuple]]
- alphabet: list[str]
- precedence: typing.Tuple[typing.Tuple[Assoc, int], ...]
- row: None | list[typing.Tuple[None | typing.Tuple, None | Configuration]]
-
- def __init__(
- self,
- alphabet: list[str],
- precedence: typing.Tuple[typing.Tuple[Assoc, int], ...],
- ):
- self.errors = ErrorCollection()
- self.table = []
- self.alphabet = alphabet
- self.precedence = precedence
- self.row = None
-
- def flush(self, all_sets: ConfigurationSetInfo) -> list[dict[str, typing.Tuple]]:
- """Finish building the table and return it.
-
- Raises ValueError if there were any conflicts during construction.
- """
- self._flush_row()
- if self.errors.any():
- errors = self.errors.format(self.alphabet, all_sets)
- raise ValueError(f"Errors building the table:\n\n{errors}")
- return self.table
-
- def new_row(self, config_set: ConfigSet):
- """Start a new row, processing the given config set. Call this before
- doing anything else.
- """
- self._flush_row()
- self.row = [(None, None) for _ in self.alphabet]
- self.current_config_set = config_set
-
- def _flush_row(self):
- if self.row:
- actions = {self.alphabet[k]: v[0] for k, v in enumerate(self.row) if v[0] is not None}
- self.table.append(actions)
-
- def set_table_reduce(self, symbol: int, config: Configuration):
- """Mark a reduce of the given configuration for the given symbol in the
- current row.
- """
- action = ("reduce", self.alphabet[config.name], len(config.symbols))
- self._set_table_action(symbol, action, config)
-
- def set_table_accept(self, symbol: int, config: Configuration):
- """Mark a accept of the given configuration for the given symbol in the
- current row.
- """
- action = ("accept",)
- self._set_table_action(symbol, action, config)
-
- def set_table_shift(self, symbol: int, index: int, config: Configuration):
- """Mark a shift in the current row of the given given symbol to the
- given index. The configuration here provides debugging informtion for
- conflicts.
- """
- action = ("shift", index)
- self._set_table_action(symbol, action, config)
-
- def set_table_goto(self, symbol: int, index: int):
- """Set the goto for the given nonterminal symbol in the current row."""
- action = ("goto", index)
- self._set_table_action(symbol, action, None)
-
- def _action_precedence(self, symbol: int, action: typing.Tuple, config: Configuration):
- if action[0] == "shift":
- return self.precedence[symbol]
- else:
- return self.precedence[config.name]
-
- def _set_table_action(self, symbol_id: int, action: typing.Tuple, config: Configuration | None):
- """Set the action for 'symbol' in the table row to 'action'.
-
- This is destructive; it changes the table. It records an error if
- there is already an action for the symbol in the row.
- """
- assert isinstance(symbol_id, int)
-
- assert self.row is not None
- existing, existing_config = self.row[symbol_id]
- if existing is not None and existing != action:
- assert existing_config is not None
- assert config is not None
-
- existing_assoc, existing_prec = self._action_precedence(
- symbol_id, existing, existing_config
- )
- new_assoc, new_prec = self._action_precedence(symbol_id, action, config)
-
- if existing_prec > new_prec:
- # Precedence of the action in the table already wins, do nothing.
- return
-
- elif existing_prec == new_prec:
- # It's an actual conflict, use associativity if we can.
- # If there's a conflict in associativity then it's a real conflict!
- assoc = Assoc.NONE
- if existing_assoc == Assoc.NONE:
- assoc = new_assoc
- elif new_assoc == Assoc.NONE:
- assoc = existing_assoc
- elif new_assoc == existing_assoc:
- assoc = new_assoc
-
- resolved = False
- if assoc == Assoc.LEFT:
- # Prefer reduce over shift
- if action[0] == "shift" and existing[0] == "reduce":
- action = existing
- resolved = True
- elif action[0] == "reduce" and existing[0] == "shift":
- resolved = True
-
- elif assoc == Assoc.RIGHT:
- # Prefer shift over reduce
- if action[0] == "shift" and existing[0] == "reduce":
- resolved = True
- elif action[0] == "reduce" and existing[0] == "shift":
- action = existing
- resolved = True
-
- if not resolved:
- # Record the conflicts.
- self.errors.add_error(
- self.current_config_set, symbol_id, existing_config, existing
- )
- self.errors.add_error(self.current_config_set, symbol_id, config, action)
-
- else:
- # Precedence of the new action is greater than the existing
- # action, just allow the overwrite with no change.
- pass
-
- self.row[symbol_id] = (action, config)
-
-
class GenerateLR0(object):
- """Generate parser tables for an LR0 parser."""
+ """Generate parser tables for an LR0 parser.
- # Internally we use integers as symbols, not strings. Mostly this is fine,
- # but when we need to map back from integer to string we index this list.
- alphabet: list[str]
+ The input grammars are of the form:
- # The grammar we work with. The outer list is indexed by grammar symbol,
- # terminal *and* non-terminal. The inner list is the list of productions
- # for the given nonterminal symbol. (If you have a terminal `t` and look it
- # up you'll just get an empty list.)
- grammar: list[list[typing.Tuple[int, ...]]]
+ grammar_simple = [
+ ('E', ['E', '+', 'T']),
+ ('E', ['T']),
+ ('T', ['(', 'E', ')']),
+ ('T', ['id']),
+ ]
- # nonterminal[i] is True if alphabet[i] is a nonterminal.
- nonterminal: typing.Tuple[bool, ...]
- # The complement of nonterminal. terminal[i] is True if alphabet[i] is a
- # terminal.
- terminal: typing.Tuple[bool, ...]
+ Which is to say, they are a list of productions. Each production is a
+ tuple where the first element of the tuple is the name of the
+ non-terminal being added, and the second elment of the tuple is the
+ list of terminals and non-terminals that make up the production.
- # The precedence of every symbol. If no precedence was explicitly provided
- # for a symbol, then its entry in this tuple will be (NONE, 0).
- precedence: typing.Tuple[typing.Tuple[Assoc, int], ...]
+ There is currently no support for custom actions or alternation or
+ anything like that. If you want alternations that you'll have to lower
+ the grammar by hand into the simpler form first.
- # The lookup that maps a particular symbol to an integer. (Only really used
- # for debugging.)
- symbol_key: dict[str, int]
- # The start symbol of the grammar.
- start_symbol: int
- # The end symbol of the grammar.
- end_symbol: int
+ Don't name anything with double-underscores; those are reserved for
+ the generator. Don't add '$' either, as it is reserved to mean
+ end-of-stream. Use an empty list to indicate nullability, that is:
- config_sets_key: dict[ConfigSet, int]
- successors: list[set[int]]
+ ('O', []),
- def __init__(
- self,
- start: str,
- grammar: list[typing.Tuple[str, list[str]]],
- precedence: None | dict[str, typing.Tuple[Assoc, int]] = None,
- ):
+ means that O can be matched with nothing.
+
+ Implementation notes:
+ - This is implemented in the dumbest way possible, in order to be the
+ most understandable it can be. I built this to learn, and I want to
+ make sure I can keep learning with it.
+
+ - We tend to use tuples everywhere. This is because tuples can be
+ compared for equality and put into tables and all that jazz. They might
+ be a little bit slower in places but like I said, this is for
+ learning. (Also, if we need this to run faster we can probably go a
+ long way by memoizing results, which is much easier if we have tuples
+ everywhere.)
+ """
+ def __init__(self, start, grammar):
"""Initialize the parser generator with the specified grammar and
start symbol.
-
- The input grammars are of the form:
-
- grammar_simple = [
- ('E', ['E', '+', 'T']),
- ('E', ['T']),
- ('T', ['(', 'E', ')']),
- ('T', ['id']),
- ]
-
- Which is to say, they are a list of productions. Each production is a
- tuple where the first element of the tuple is the name of the
- non-terminal being added, and the second elment of the tuple is the
- list of terminals and non-terminals that make up the production.
-
- There is currently no support for custom actions or alternation or
- anything like that. If you want alternations that you'll have to lower
- the grammar by hand into the simpler form first.
-
- Don't name anything with double-underscores; those are reserved for
- the generator. Don't add '$' either, as it is reserved to mean
- end-of-stream. Use an empty list to indicate nullability, that is:
-
- ('O', []),
-
- means that O can be matched with nothing.
-
- This isn't a *great* way to author these things, but it is very simple
- and flexible. You probably don't want to author this on your own; see
- the Grammar class for a high-level API.
-
- The precedence dictionary, if provided, maps a given symbol to an
- associativity and a precedence. Any symbol not in the dictionary is
- presumed to have an associativity of NONE and a precedence of zero.
"""
-
- # Work out the alphabet.
- alphabet = set()
- for name, rule in grammar:
- alphabet.add(name)
- alphabet.update(symbol for symbol in rule)
+ # We always store the "augmented" grammar, which contains an initial
+ # production for the start state. grammar[0] is always the start
+ # rule, and in the set of states and table and whatever the first
+ # element is always the starting state/position.
+ self.grammar = [('__start', [start])] + grammar
+ self.nonterminals = {rule[0] for rule in grammar}
+ self.terminals = {
+ sym
+ for name, symbols in grammar
+ for sym in symbols
+ if sym not in self.nonterminals
+ }
+ self.alphabet = self.terminals | self.nonterminals
# Check to make sure they didn't use anything that will give us
# heartburn later.
- reserved = [a for a in alphabet if a.startswith("__") or a == "$"]
+ reserved = [a for a in self.alphabet if a.startswith('__') or a == '$']
if reserved:
raise ValueError(
"Can't use {symbols} in grammars, {what} reserved.".format(
- symbols=" or ".join(reserved),
+ symbols=' or '.join(reserved),
what="it's" if len(reserved) == 1 else "they're",
)
)
- alphabet.add("__start")
- alphabet.add("$")
- self.alphabet = list(sorted(alphabet))
+ self.terminals.add('$')
+ self.alphabet.add('$')
- symbol_key = {symbol: index for index, symbol in enumerate(self.alphabet)}
-
- start_symbol = symbol_key["__start"]
- end_symbol = symbol_key["$"]
-
- assert self.alphabet[start_symbol] == "__start"
- assert self.alphabet[end_symbol] == "$"
-
- # Turn the incoming grammar into a dictionary, indexed by nonterminal.
- #
- # We count on python dictionaries retaining the insertion order, like
- # it or not.
- full_grammar: list[list] = [list() for _ in self.alphabet]
- terminal: list[bool] = [True for _ in self.alphabet]
- assert terminal[end_symbol]
-
- nonterminal = [False for _ in self.alphabet]
-
- for name, rule in grammar:
- name_symbol = symbol_key[name]
-
- terminal[name_symbol] = False
- nonterminal[name_symbol] = True
-
- rules = full_grammar[name_symbol]
- rules.append(tuple(symbol_key[symbol] for symbol in rule))
-
- self.grammar = full_grammar
- self.grammar[start_symbol].append((symbol_key[start],))
- terminal[start_symbol] = False
- nonterminal[start_symbol] = True
-
- self.terminal = tuple(terminal)
- self.nonterminal = tuple(nonterminal)
-
- assert self.terminal[end_symbol]
- assert self.nonterminal[start_symbol]
-
- if precedence is None:
- precedence = {}
- self.precedence = tuple(precedence.get(a, (Assoc.NONE, 0)) for a in self.alphabet)
-
- self.symbol_key = symbol_key
- self.start_symbol = start_symbol
- self.end_symbol = end_symbol
-
- @functools.cache
- def gen_closure_next(self, config: Configuration):
- """Return the next set of configurations in the closure for config.
+ def gen_closure_next(self, config):
+ """Return the next set of configurations in the closure for
+ config.
If the position for config is just before a non-terminal, then the
next set of configurations is configurations for all of the
@@ -769,117 +166,96 @@ class GenerateLR0(object):
beginning. (If the position for config is just before a terminal,
or at the end of the production, then the next set is empty.)
"""
- next = config.next
- if next is None:
+ if config.at_end:
return ()
else:
- return tuple(Configuration.from_rule(next, rule) for rule in self.grammar[next])
+ return tuple(
+ Configuration.from_rule(rule)
+ for rule in self.grammar
+ if rule[0] == config.next
+ )
- def gen_closure(self, seeds: typing.Iterable[Configuration]) -> ConfigSet:
- """Compute the closure for the specified configs. The closure is all
- of the configurations we could be in. Specifically, if the position
- for a config is just before a non-terminal then we must also consider
- configurations where the rule is the rule for the non-terminal and
- the position is just before the beginning of the rule.
+ def gen_closure(self, config, closure):
+ """Compute the closure for the specified config and unify it with the
+ existing closure.
- (We have replaced a recursive version with an iterative one.)
+ If the provided config is already in the closure then nothing is
+ done. (We assume that the closure of the config is *also* already in
+ the closure.)
"""
- closure = set()
- pending = list(seeds)
- pending_next = []
- while len(pending) > 0:
- for config in pending:
- if config in closure:
- continue
+ if config in closure:
+ return closure
+ else:
+ new_closure = tuple(closure) + (config,)
+ for next_config in self.gen_closure_next(config):
+ new_closure = self.gen_closure(next_config, new_closure)
+ return new_closure
- closure.add(config)
- for next_config in self.gen_closure_next(config):
- pending_next.append(next_config)
-
- temp = pending
- pending = pending_next
- pending_next = temp
- pending_next.clear()
-
- return tuple(sorted(closure)) # TODO: Why tuple?
-
- def gen_successor(self, config_set: typing.Iterable[Configuration], symbol: int) -> ConfigSet:
+ def gen_successor(self, config_set, symbol):
"""Compute the successor state for the given config set and the
given symbol.
The successor represents the next state of the parser after seeing
the symbol.
"""
- seeds = tuple(
- config.replace_position(config.position + 1)
+ seeds = [
+ config.replace(position=config.position + 1)
for config in config_set
- if config.next == symbol
- )
+ if config.at_symbol(symbol)
+ ]
+
+ closure = ()
+ for seed in seeds:
+ closure = self.gen_closure(seed, closure)
- closure = self.gen_closure(seeds)
return closure
- def gen_all_successors(
- self, config_set: typing.Iterable[Configuration]
- ) -> list[typing.Tuple[int, ConfigSet]]:
- """Return all of the non-empty successors for the given config set.
-
- (That is, given the config set, pretend we see all the symbols we
- could possibly see, and figure out which configs sets we get from
- those symbols. Those are the successors of this set.)
- """
- possible = tuple(sorted({config.next for config in config_set if config.next is not None}))
-
+ def gen_all_successors(self, config_set):
+ """Return all of the non-empty successors for the given config set."""
next = []
- for symbol in possible:
+ for symbol in self.alphabet:
successor = self.gen_successor(config_set, symbol)
if len(successor) > 0:
- next.append((symbol, successor))
+ next.append(successor)
- return next
+ return tuple(next)
- def gen_sets(self, config_set: typing.Tuple[Configuration, ...]) -> ConfigurationSetInfo:
- """Generate all configuration sets starting from the provided set."""
- result = ConfigurationSetInfo()
+ def gen_sets(self, config_set, F):
+ """Recursively generate all configuration sets starting from the
+ provided set, and merge them with the provided set 'F'.
+ """
+ if config_set in F:
+ return F
+ else:
+ new_F = F + (config_set,)
+ for successor in self.gen_all_successors(config_set):
+ new_F = self.gen_sets(successor, new_F)
- successors = []
- pending = [config_set]
- pending_next = []
- while len(pending) > 0:
- for config_set in pending:
- id, is_new = result.register_config_set(config_set)
- if is_new:
- for symbol, successor in self.gen_all_successors(config_set):
- successors.append((id, symbol, successor))
- pending_next.append(successor)
+ return new_F
- temp = pending
- pending = pending_next
- pending_next = temp
- pending_next.clear()
-
- for id, symbol, successor in successors:
- result.add_successor(id, symbol, result.config_set_key[successor])
-
- return result
-
- def gen_all_sets(self) -> ConfigurationSetInfo:
+ def gen_all_sets(self):
"""Generate all of the configuration sets for the grammar."""
- seeds = tuple(
- Configuration.from_rule(self.start_symbol, rule)
- for rule in self.grammar[self.start_symbol]
+ initial_set = self.gen_closure(
+ Configuration.from_rule(self.grammar[0]),
+ (),
)
- initial_set = self.gen_closure(seeds)
- return self.gen_sets(initial_set)
+ return self.gen_sets(initial_set, ())
- def gen_reduce_set(self, config: Configuration) -> typing.Iterable[int]:
+ def find_set_index(self, sets, set):
+ """Find the specified set in the set of sets, and return the
+ index, or None if it is not found.
+ """
+ for i, s in enumerate(sets):
+ if s == set:
+ return i
+ return None
+
+ def gen_reduce_set(self, config):
"""Return the set of symbols that indicate we should reduce the given
configuration.
- In an LR0 parser, this is just the set of all terminals.
- """
- del config
- return [index for index, value in enumerate(self.terminal) if value]
+ In an LR0 parser, this is just the set of all terminals."""
+ return self.terminals
def gen_table(self):
"""Generate the parse table.
@@ -909,32 +285,89 @@ class GenerateLR0(object):
Anything missing from the row indicates an error.
"""
+ action_table = []
config_sets = self.gen_all_sets()
- builder = TableBuilder(self.alphabet, self.precedence)
-
- for config_set_id, config_set in enumerate(config_sets.sets):
- builder.new_row(config_set)
- successors = config_sets.successors[config_set_id]
+ for config_set in config_sets:
+ actions = {}
+ # Actions
for config in config_set:
- config_next = config.next
- if config_next is None:
- if config.name != self.start_symbol:
+ if config.at_end:
+ if config.name != '__start':
for a in self.gen_reduce_set(config):
- builder.set_table_reduce(a, config)
+ self.set_table_action(
+ actions,
+ a,
+ ('reduce', config.name, len(config.symbols)),
+ config,
+ )
else:
- builder.set_table_accept(self.end_symbol, config)
+ self.set_table_action(
+ actions,
+ '$',
+ ('accept',),
+ config,
+ )
- elif self.terminal[config_next]:
- index = successors[config_next]
- builder.set_table_shift(config_next, index, config)
+ else:
+ if config.next in self.terminals:
+ successor = self.gen_successor(config_set, config.next)
+ index = self.find_set_index(config_sets, successor)
+ self.set_table_action(
+ actions,
+ config.next,
+ ('shift', index),
+ config,
+ )
# Gotos
- for symbol, index in successors.items():
- if self.nonterminal[symbol]:
- builder.set_table_goto(symbol, index)
+ for symbol in self.nonterminals:
+ successor = self.gen_successor(config_set, symbol)
+ index = self.find_set_index(config_sets, successor)
+ if index is not None:
+ self.set_table_action(
+ actions,
+ symbol,
+ ('goto', index),
+ None,
+ )
- return builder.flush(config_sets)
+ # set_table_action stores the configs that generated the actions in
+ # the table, for diagnostic purposes. This filters them out again
+ # so that the parser has something clean to work with.
+ actions = {k: self.get_table_action(actions, k) for k in actions}
+ action_table.append(actions)
+
+ return action_table
+
+ def set_table_action(self, row, symbol, action, config):
+ """Set the action for 'symbol' in the table row to 'action'.
+
+ This is destructive; it changes the table. It raises an error if
+ there is already an action for the symbol in the row.
+ """
+ existing, existing_config = row.get(symbol, (None, None))
+ if existing is not None and existing != action:
+ config_old = str(existing_config)
+ config_new = str(config)
+ max_len = max(len(config_old), len(config_new)) + 1
+ error = (
+ "Conflicting actions for token '{symbol}':\n"
+ " {config_old: <{max_len}}: {old}\n"
+ " {config_new: <{max_len}}: {new}\n".format(
+ config_old=config_old,
+ config_new=config_new,
+ max_len=max_len,
+ old=existing,
+ new=action,
+ symbol=symbol,
+ )
+ )
+ raise ValueError(error)
+ row[symbol] = (action, config)
+
+ def get_table_action(self, row, symbol):
+ return row[symbol][0]
def parse(table, input, trace=False):
@@ -947,53 +380,48 @@ def parse(table, input, trace=False):
input is a list of tokens. Don't stick an end-of-stream marker, I'll stick
one on for you.
-
- This is not a *great* parser, it's really just a demo for what you can
- do with the table.
"""
- assert "$" not in input
- input = input + ["$"]
+ assert '$' not in input
+ input = input + ['$']
input_index = 0
# Our stack is a stack of tuples, where the first entry is the state number
# and the second entry is the 'value' that was generated when the state was
# pushed.
- stack: list[typing.Tuple[int, typing.Any]] = [(0, None)]
+ stack = [(0, None)]
while True:
current_state = stack[-1][0]
current_token = input[input_index]
- action = table[current_state].get(current_token, ("error",))
+ action = table[current_state].get(current_token, ('error',))
if trace:
- print(
- "{stack: <20} {input: <50} {action: <5}".format(
- stack=repr([s[0] for s in stack]),
- input=repr(input[input_index:]),
- action=repr(action),
- )
- )
+ print("{stack: <20} {input: <50} {action: <5}".format(
+ stack=repr([s[0] for s in stack]),
+ input=repr(input[input_index:]),
+ action=repr(action)
+ ))
- if action[0] == "accept":
+ if action[0] == 'accept':
return stack[-1][1]
- elif action[0] == "reduce":
+ elif action[0] == 'reduce':
name = action[1]
size = action[2]
value = (name, tuple(s[1] for s in stack[-size:]))
stack = stack[:-size]
- goto = table[stack[-1][0]].get(name, ("error",))
- assert goto[0] == "goto" # Corrupt table?
+ goto = table[stack[-1][0]].get(name, ('error',))
+ assert goto[0] == 'goto' # Corrupt table?
stack.append((goto[1], value))
- elif action[0] == "shift":
+ elif action[0] == 'shift':
stack.append((action[1], (current_token, ())))
input_index += 1
- elif action[0] == "error":
+ elif action[0] == 'error':
raise ValueError(
- "Syntax error: unexpected symbol {sym}".format(
+ 'Syntax error: unexpected symbol {sym}'.format(
sym=current_token,
),
)
@@ -1002,228 +430,6 @@ def parse(table, input, trace=False):
###############################################################################
# SLR(1)
###############################################################################
-def update_changed(items: set[int], other: set[int]) -> bool:
- """Merge the `other` set into the `items` set, and return True if this
- changed the items set.
- """
- old_len = len(items)
- items.update(other)
- return old_len != len(items)
-
-
-@dataclasses.dataclass(frozen=True)
-class FirstInfo:
- """A structure that tracks the first set of a grammar. (Or, as it is
- commonly styled in textbooks, FIRST.)
-
- firsts[s] is the set of first terminals of any particular nonterminal s.
- (For a terminal , firsts[s] == s.)
-
- is_epsilon[s] is True if the nonterminal s can be empty, that is, if
- it can match zero symbols.
-
- For example, consider following grammar:
-
- [
- ('x', ['y', 'A']),
- ('y', ['z']),
- ('y', ['B', 'x']),
- ('y', []),
- ('z', ['C']),
- ('z', ['D', x]),
- ]
-
- For this grammar, FIRST['z'] is ('C', 'D').
-
- FIRST['y'] is ('B', 'C', 'D'). For the first production, 'z' is first, and
- since 'z' is a nonterminal we need to include all of its symbols too,
- transitively. For the second production, 'B' is first, and so that gets
- added to the set. The last production doesn't have anything in it, so it
- doesn't contribute to FIRST['y'], but it does set `is_epsilon` to True.
-
- Finally, FIRST['x'] is ('A', 'B', 'C', 'D'). ('B', 'C', 'D') comes from
- FIRST['y'], as 'y' is first in our only production. But the 'A' comes from
- the fact that is_epsilon['y'] is True: since 'y' can match empty input,
- it is also legal for 'x' to begin with 'A'.
- """
-
- firsts: list[set[int]]
- is_epsilon: list[bool]
-
- @classmethod
- def from_grammar(
- cls,
- grammar: list[list[typing.Tuple[int, ...]]],
- terminal: typing.Tuple[bool, ...],
- ) -> "FirstInfo":
- """Construct a new FirstInfo from the specified grammar.
-
- terminal[s] is True if symbol s is a terminal symbol.
- """
- # Add all terminals to their own firsts
- firsts: list[set[int]] = []
- for index, is_terminal in enumerate(terminal):
- firsts.append(set())
- if is_terminal:
- firsts[index].add(index)
-
- # Because we're working with recursive and mutually recursive rules, we
- # need to make sure we terminate once we've actually found all the first
- # symbols. Naive recursion will go forever, and recursion with a visited
- # set to halt recursion ends up revisiting the same symbols over and
- # over, running *very* slowly. Strangely, iteration to fixed-point turns
- # out to be reasonably quick in practice, and is what every other parser
- # generator uses in the end.
- epsilons = [False for _ in terminal]
- changed = True
- while changed:
- changed = False
- for name, rules in enumerate(grammar):
- f = firsts[name]
- for rule in rules:
- if len(rule) == 0:
- changed = changed or not epsilons[name]
- epsilons[name] = True
- continue
-
- for index, symbol in enumerate(rule):
- other_firsts = firsts[symbol]
- changed = update_changed(f, other_firsts) or changed
-
- is_last = index == len(rule) - 1
- if is_last and epsilons[symbol]:
- # If this is the last symbol and the last
- # symbol can be empty then I can be empty
- # too! :P
- changed = changed or not epsilons[name]
- epsilons[name] = True
-
- if not epsilons[symbol]:
- # If we believe that there is at least one
- # terminal in the first set of this
- # nonterminal then I don't have to keep
- # looping through the symbols in this rule.
- break
-
- return FirstInfo(firsts=firsts, is_epsilon=epsilons)
-
-
-@dataclasses.dataclass(frozen=True)
-class FollowInfo:
- """A structure that tracks the follow set of a grammar. (Or, again, as the
- textbooks would have it, FOLLOW.)
-
- The follow set for a nonterminal is the set of terminals that can follow the
- nonterminal in a valid sentence. The resulting set never contains epsilon
- and is never empty, since we should always at least ground out at '$', which
- is the end-of-stream marker.
-
- In order to compute follow, we need to find every place that a given
- nonterminal appears in the grammar, and look at the first set of the symbol
- that follows it. But if the first set of the symbol that follows it includes
- epsilon, then we need to include the first of the symbol after *that*, and
- so forth, until we finally either get to the end of the rule or we find some
- symbol whose first doesn't include epsilon.
-
- If we get to the end of the rule before finding a symbol that doesn't include
- epsilon, then we also need to include the follow of the nonterminal that
- contains the rule itself. (Anything that follows this rule can follow the
- symbol we're considering.)
-
- Consider this nonsense grammar:
-
- [
- ('s', ['x', 'A']),
-
- ('x', ['y', 'B']),
- ('x', ['y', 'z']),
-
- ('y', ['x', 'C']),
-
- ('z', ['D']),
- ('z', []),
- ]
-
- In this grammar, FOLLOW['y'] is ('A', 'B', 'D'). 'B' comes from the first
- production of 'x', that's easy. 'D' comes from the second production of 'x':
- FIRST['z'] is ('D'), and so that goes into FOLLOW['y'].
-
- 'A' is the surprising one: it comes from the fact that FIRST['z'] contains
- epsilon. Since 'z' can successfully match on empty input, we need to treat
- 'y' as if it were at the end of 'x'. Anything that can follow 'x' can also
- follow 'y'. Since 'A' is in FOLLOW['x'] (from the production 's'), then 'A'
- is also in FOLLOW['y'].
-
- Note that the follow set of any nonterminal is never empty and never
- contains epsilon: they all terminate at the end-of-stream marker eventually,
- by construction. (The individual parser generators make sure to augment the
- grammar so that this is true, and that's a main reason why they do it.)
- """
-
- follows: list[set[int]]
-
- @classmethod
- def from_grammar(
- cls,
- grammar: list[list[typing.Tuple[int, ...]]],
- terminal: typing.Tuple[bool, ...],
- start_symbol: int,
- end_symbol: int,
- firsts: FirstInfo,
- ):
- follows: list[set[int]] = [set() for _ in grammar]
- follows[start_symbol].add(end_symbol)
-
- # See the comment in FirstInfo for why this is the way it is, more or
- # less. Iteration to fixed point handlily beats recursion with
- # memoization. I'm as shocked and dismayed as you as you are, but it's
- # nice to remember that fixed-point algorithms are good sometimes.
- changed = True
- while changed:
- changed = False
- for name, rules in enumerate(grammar):
- for rule in rules:
- # To do this more efficiently, we actually walk backwards
- # through the rule. As long as we've still seen something
- # with epsilon, then we need to add FOLLOW[name] to
- # FOLLOW[symbol]. As soon as we see something *without*
- # epsilon, we can stop doing that. (This is *way* more
- # efficient than trying to figure out epsilon while walking
- # forward.)
- epsilon = True
- prev_symbol = None
- for symbol in reversed(rule):
- f = follows[symbol]
- if terminal[symbol]:
- # This particular rule can't produce epsilon.
- epsilon = False
- prev_symbol = symbol
- continue
-
- # While epsilon is still set, update the follow of
- # this nonterminal with the follow of the production
- # we're processing. (This also means that the follow
- # of the last symbol in the production is the follow
- # of the entire production, as it should be.)
- if epsilon:
- changed = update_changed(f, follows[name]) or changed
-
- # If we're not at the end of the list then the follow
- # of the current symbol contains the first of the
- # next symbol.
- if prev_symbol is not None:
- changed = update_changed(f, firsts.firsts[prev_symbol]) or changed
-
- # Now if there's no epsilon in this symbol there's no
- # more epsilon in the rest of the sequence.
- if not firsts.is_epsilon[symbol]:
- epsilon = False
-
- prev_symbol = symbol
-
- return FollowInfo(follows=follows)
-
-
class GenerateSLR1(GenerateLR0):
"""Generate parse tables for SLR1 grammars.
@@ -1234,48 +440,115 @@ class GenerateSLR1(GenerateLR0):
non-terminal.
That means SLR1 parsers need to know how to generate 'follow(A)', which
- means they need to know how to generate 'first(A)'. See FirstInfo and
- FollowInfo for the details on how this is computed.
+ means they need to know how to generate 'first(A)', which is most of the
+ code in this class.
"""
+ def gen_first_symbol(self, symbol, visited):
+ """Compute the first set for a single symbol.
- _firsts: FirstInfo
- _follows: FollowInfo
+ If a symbol can be empty, then the set contains epsilon, which we
+ represent as python's `None`.
- def __init__(self, *args, **kwargs):
- """See the constructor of GenerateLR0 for an explanation of the
- parameters to the constructor and what they mean.
+ The first set is the set of tokens that can appear as the first token
+ for a given symbol. (Obviously, if the symbol is itself a token, then
+ this is trivial.)
+
+ 'visited' is a set of already visited symbols, to stop infinite
+ recursion on left-recursive grammars. That means that sometimes this
+ function can return an empty tuple. Don't confuse that with a tuple
+ containing epsilon: that's a tuple containing `None`, not an empty
+ tuple.
"""
- super().__init__(*args, **kwargs)
+ if symbol in self.terminals:
+ return (symbol,)
+ elif symbol in visited:
+ return ()
+ else:
+ assert symbol in self.nonterminals
+ visited.add(symbol)
- # We store the firsts not because we need them here, but because LR1
- # and LALR need them.
- self._firsts = FirstInfo.from_grammar(self.grammar, self.terminal)
- self._follows = FollowInfo.from_grammar(
- self.grammar,
- self.terminal,
- self.start_symbol,
- self.end_symbol,
- self._firsts,
- )
+ # All the firsts from all the productions.
+ firsts = [
+ self.gen_first(rule[1], visited)
+ for rule in self.grammar
+ if rule[0] == symbol
+ ]
- def gen_follow(self, symbol: int) -> set[int]:
+ result = ()
+ for fs in firsts:
+ result = result + tuple(f for f in fs if f not in result)
+
+ return tuple(sorted(result))
+
+ def gen_first(self, symbols, visited=None):
+ """Compute the first set for a sequence of symbols.
+
+ The first set is the set of tokens that can appear as the first token
+ for this sequence of symbols. The interesting wrinkle in computing the
+ first set for a sequence of symbols is that we keep computing the first
+ sets so long as epsilon appears in the set. i.e., if we are computing
+ for ['A', 'B', 'C'] and the first set of 'A' contains epsilon, then the
+ first set for the *sequence* also contains the first set of ['B', 'C'],
+ since 'A' could be missing entirely.
+
+ An epsilon in the result is indicated by 'None'. There will always be
+ at least one element in the result.
+
+ The 'visited' parameter, if not None, is a set of symbols that are
+ already in the process of being evaluated, to deal with left-recursive
+ grammars. (See gen_first_symbol for more.)
+ """
+ if len(symbols) == 0:
+ return (None,) # Epsilon.
+ else:
+ if visited is None:
+ visited = set()
+ result = self.gen_first_symbol(symbols[0], visited)
+ if None in result:
+ result = tuple(s for s in result if s is not None)
+ result = result + self.gen_first(symbols[1:], visited)
+ result = tuple(sorted(set(result)))
+ return result
+
+ def gen_follow(self, symbol, visited=None):
"""Generate the follow set for the given nonterminal.
The follow set for a nonterminal is the set of terminals that can
follow the nonterminal in a valid sentence. The resulting set never
contains epsilon and is never empty, since we should always at least
ground out at '$', which is the end-of-stream marker.
-
- See FollowInfo for more information on how this is determined.
"""
- return self._follows.follows[symbol]
+ if symbol == '__start':
+ return tuple('$')
- def gen_reduce_set(self, config: Configuration) -> typing.Iterable[int]:
+ assert symbol in self.nonterminals
+
+ # Deal with left-recursion.
+ if visited is None:
+ visited = set()
+ if symbol in visited:
+ return ()
+ visited.add(symbol)
+
+ follow = ()
+ for production in self.grammar:
+ for index, prod_symbol in enumerate(production[1]):
+ if prod_symbol != symbol:
+ continue
+
+ first = self.gen_first(production[1][index+1:])
+ follow = follow + tuple(f for f in first if f is not None)
+ if None in first:
+ follow = follow + self.gen_follow(production[0], visited)
+
+ assert None not in follow # Should always ground out at __start
+ return follow
+
+ def gen_reduce_set(self, config):
"""Return the set of symbols that indicate we should reduce the given
config.
- In an SLR1 parser, this is the follow set of the config nonterminal.
- """
+ In an SLR1 parser, this is the follow set of the config nonterminal."""
return self.gen_follow(config.name)
@@ -1290,39 +563,16 @@ class GenerateLR1(GenerateSLR1):
details. (Except for the start configuration, which has '$' as its
lookahead.)
"""
-
- def gen_first(self, symbols: typing.Iterable[int]) -> typing.Tuple[set[int], bool]:
- """Return the first set for a *sequence* of symbols.
-
- (This is more than FIRST: we need to know the first thing that can
- happen in this particular sequence right here.)
-
- Build the set by combining the first sets of the symbols from left to
- right as long as epsilon remains in the first set. If we reach the end
- and every symbol has had epsilon, then this set also has epsilon.
-
- Otherwise we can stop as soon as we get to a non-epsilon first(), and
- our result does not have epsilon.
- """
- result = set()
- for s in symbols:
- result.update(self._firsts.firsts[s])
- if not self._firsts.is_epsilon[s]:
- return (result, False)
-
- return (result, True)
-
- def gen_reduce_set(self, config: Configuration) -> typing.Iterable[int]:
+ def gen_reduce_set(self, config):
"""Return the set of symbols that indicate we should reduce the given
config.
- In an LR1 parser, this is the lookahead of the configuration.
- """
+ In an LR1 parser, this is the lookahead of the configuration."""
return config.lookahead
- @functools.cache
- def gen_closure_next(self, config: Configuration):
- """Return the next set of configurations in the closure for config.
+ def gen_closure_next(self, config):
+ """Return the next set of configurations in the closure for
+ config.
In LR1 parsers, we must compute the lookahead for the configurations
we're adding to the closure. The lookahead for the new configurations
@@ -1333,21 +583,29 @@ class GenerateLR1(GenerateSLR1):
from an upstream production in the grammar.)
(See the documentation in GenerateLR0 for more information on how
- this function fits into the whole process, specifically `gen_closure`.)
+ this function fits into the whole process.)
"""
- config_next = config.next
- if config_next is None:
+ if config.at_end:
return ()
else:
next = []
- for rule in self.grammar[config_next]:
- lookahead, epsilon = self.gen_first(config.rest)
- if epsilon:
- lookahead.update(config.lookahead)
- lookahead_tuple = tuple(sorted(lookahead))
- next.append(Configuration.from_rule(config_next, rule, lookahead=lookahead_tuple))
+ for rule in self.grammar:
+ if rule[0] != config.next:
+ continue
- return tuple(sorted(next))
+ # N.B.: We can't just append config.lookahead to config.rest
+ # and compute first(), because lookahead is a *set*. So
+ # in this case we just say if 'first' contains epsilon,
+ # then we need to remove the epsilon and union with the
+ # existing lookahead.
+ lookahead = self.gen_first(config.rest)
+ if None in lookahead:
+ lookahead = tuple(l for l in lookahead if l is not None)
+ lookahead = lookahead + config.lookahead
+ lookahead = tuple(sorted(set(lookahead)))
+ next.append(Configuration.from_rule(rule, lookahead=lookahead))
+
+ return tuple(next)
def gen_all_sets(self):
"""Generate all of the configuration sets for the grammar.
@@ -1355,12 +613,11 @@ class GenerateLR1(GenerateSLR1):
In LR1 parsers, we must remember to set the lookahead of the start
symbol to '$'.
"""
- seeds = tuple(
- Configuration.from_rule(self.start_symbol, rule, lookahead=(self.end_symbol,))
- for rule in self.grammar[self.start_symbol]
+ initial_set = self.gen_closure(
+ Configuration.from_rule(self.grammar[0], lookahead=('$',)),
+ (),
)
- initial_set = self.gen_closure(seeds)
- return self.gen_sets(initial_set)
+ return self.gen_sets(initial_set, ())
class GenerateLALR(GenerateLR1):
@@ -1374,14 +631,9 @@ class GenerateLALR(GenerateLR1):
it does lose information. The advantage is that the number of parser
states is much much smaller in LALR than in LR(1).
- If you can get away with generating LALR tables for a grammar than you
- should do it.
-
(Note that because we use immutable state everywhere this generator does
- a lot of copying and allocation. This particular generator could still
- use a bunch of improvement, probably.)
+ a lot of copying and allocation.)
"""
-
def merge_sets(self, config_set_a, config_set_b):
"""Merge the two config sets, by keeping the item cores but merging
the lookahead sets for each item.
@@ -1390,20 +642,20 @@ class GenerateLALR(GenerateLR1):
merged = []
for index, a in enumerate(config_set_a):
b = config_set_b[index]
- assert a.clear_lookahead() == b.clear_lookahead()
+ assert a.replace(lookahead=()) == b.replace(lookahead=())
new_lookahead = a.lookahead + b.lookahead
new_lookahead = tuple(sorted(set(new_lookahead)))
- merged.append(a.clear_lookahead())
+ merged.append(a.replace(lookahead=new_lookahead))
return tuple(merged)
def sets_equal(self, a, b):
- a_no_la = tuple(s.clear_lookahead() for s in a)
- b_no_la = tuple(s.clear_lookahead() for s in b)
+ a_no_la = tuple(s.replace(lookahead=()) for s in a)
+ b_no_la = tuple(s.replace(lookahead=()) for s in b)
return a_no_la == b_no_la
- def gen_sets(self, config_set) -> ConfigurationSetInfo:
+ def gen_sets(self, config_set, F):
"""Recursively generate all configuration sets starting from the
provided set, and merge them with the provided set 'F'.
@@ -1413,331 +665,28 @@ class GenerateLALR(GenerateLR1):
then instead of returning F unchanged, we merge the two equal sets
and replace the set in F, returning the modified set.
"""
- F = {}
- successors = []
- pending = [config_set]
- while len(pending) > 0:
- config_set = pending.pop()
- config_set_no_la = tuple(s.clear_lookahead() for s in config_set)
+ config_set_no_la = tuple(s.replace(lookahead=()) for s in config_set)
+ for index, existing in enumerate(F):
+ existing_no_la = tuple(s.replace(lookahead=()) for s in existing)
+ if config_set_no_la == existing_no_la:
+ merged_set = self.merge_sets(config_set, existing)
+ return F[:index] + (merged_set,) + F[index+1:]
- existing = F.get(config_set_no_la)
- if existing is not None:
- F[config_set_no_la] = self.merge_sets(config_set, existing)
- else:
- F[config_set_no_la] = config_set
- for symbol, successor in self.gen_all_successors(config_set):
- successor_no_la = tuple(s.clear_lookahead() for s in successor)
- successors.append((config_set_no_la, symbol, successor_no_la))
- pending.append(successor)
+ # No merge candidate found, proceed.
+ new_F = F + (config_set,)
+ for successor in self.gen_all_successors(config_set):
+ new_F = self.gen_sets(successor, new_F)
- # Register all the actually merged, final config sets.
- result = ConfigurationSetInfo()
- for config_set in F.values():
- result.register_config_set(config_set)
+ return new_F
- # Now record all the successors that we found. Of course, the actual
- # sets that wound up in the ConfigurationSetInfo don't match anything
- # we found during the previous phase.
- #
- # *Fortunately* we recorded the no-lookahead keys in the successors
- # so we can find the final sets, then look them up in the registered
- # sets, and actually register the successor.
- for config_set_no_la, symbol, successor_no_la in successors:
- actual_config_set = F[config_set_no_la]
- from_index = result.config_set_key[actual_config_set]
-
- actual_successor = F[successor_no_la]
- to_index = result.config_set_key[actual_successor]
-
- result.add_successor(from_index, symbol, to_index)
-
- return result
-
-
-###############################################################################
-# Sugar for constructing grammars
-###############################################################################
-# This is the "high level" API for constructing grammars.
-class Rule:
- """A token (terminal), production (nonterminal), or some other
- combination thereof. Rules are composed and then flattened into
- productions.
- """
-
- def __or__(self, other) -> "Rule":
- return AlternativeRule(self, other)
-
- def __add__(self, other) -> "Rule":
- return SequenceRule(self, other)
-
- @abc.abstractmethod
- def flatten(self) -> typing.Generator[list["str | Token"], None, None]:
- """Convert this potentially nested and branching set of rules into a
- series of nice, flat symbol lists.
-
- e.g., if this rule is (X + (A | (B + C | D))) then flattening will
- yield something like:
-
- ["X", "A"]
- ["X", "B", "C"]
- ["X", "B", "D"]
-
- Isn't that nice?
-
- Note that Token rules remain unchanged in the result: this is so we
- can better distinguish terminals from nonterminals while processing
- the grammar.
+ def find_set_index(self, sets, set):
+ """Find the specified set in the set of sets, and return the
+ index, or None if it is not found.
"""
- raise NotImplementedError()
-
-
-class Token(Rule):
- """A token, or terminal symbol in the grammar."""
-
- value: str
-
- def __init__(self, value):
- self.value = sys.intern(value)
-
- def flatten(self) -> typing.Generator[list[str], None, None]:
- # We are just ourselves when flattened.
- yield [self]
-
-
-class NonTerminal(Rule):
- """A non-terminal, or a production, in the grammar.
-
- You probably don't want to create this directly; instead you probably want
- to use the `@rule` decorator to associate this with a function in your
- grammar class.
- """
-
- def __init__(self, fn: typing.Callable[["Grammar"], Rule], name: str | None = None):
- """Create a new NonTerminal.
-
- `fn` is the function that will yield the `Rule` which is the
- right-hand-side of this production; it will be flattened with `flatten`.
- `name` is the name of the production- if unspecified (or `None`) it will
- be replaced with the `__name__` of the provided fn.
- """
- self.fn = fn
- self.name = name or fn.__name__
-
- def generate_body(self, grammar) -> list[list[str | Token]]:
- """Generate the body of the non-terminal.
-
- We do this by first calling the associated function in order to get a
- Rule, and then flattening the Rule into the associated set of
- productions.
- """
- return [rule for rule in self.fn(grammar).flatten()]
-
- def flatten(self) -> typing.Generator[list[str | Token], None, None]:
- # Although we contain multitudes, when flattened we're being asked in
- # the context of some other production. Yield ourselves, and trust that
- # in time we will be asked to generate our body.
- yield [self.name]
-
-
-class AlternativeRule(Rule):
- """A rule that matches if one or another rule matches."""
-
- def __init__(self, left: Rule, right: Rule):
- self.left = left
- self.right = right
-
- def flatten(self) -> typing.Generator[list[str], None, None]:
- # All the things from the left of the alternative, then all the things
- # from the right, never intermingled.
- yield from self.left.flatten()
- yield from self.right.flatten()
-
-
-class SequenceRule(Rule):
- """A rule that matches if a first part matches, followed by a second part.
- Two things in order.
- """
-
- def __init__(self, first: Rule, second: Rule):
- self.first = first
- self.second = second
-
- def flatten(self) -> typing.Generator[list[str], None, None]:
- # All the things in the prefix....
- for first in self.first.flatten():
- # ...potentially followed by all the things in the suffix.
- for second in self.second.flatten():
- yield first + second
-
-
-class NothingRule(Rule):
- """A rule that matches no input. Nothing, the void. Don't make a new one of
- these, you're probably better off just using the singleton `Nothing`.
- """
-
- def flatten(self) -> typing.Generator[list[str], None, None]:
- # It's quiet in here.
- yield []
-
-
-Nothing = NothingRule()
-
-
-def seq(*args: list[Rule]) -> Rule:
- """A rule that matches a sequence of rules.
-
- (A helper function that combines its arguments into nested sequences.)
- """
- result = args[0]
- for rule in args[1:]:
- result = SequenceRule(result, rule)
- return result
-
-
-@typing.overload
-def rule(name: None | str = None) -> typing.Callable[[typing.Callable], Rule]: ...
-
-
-@typing.overload
-def rule(fn: typing.Callable) -> Rule: ...
-
-
-def rule(
- name_or_fn: None | str | typing.Callable = None,
-) -> Rule | typing.Callable[[typing.Callable], Rule]:
- """The decorator that marks a method in a Grammar object as a nonterminal
- rule.
-
- As with all the best decorators, it can be called with or without arguments.
- If called with one argument, that argument is a name that overrides the name
- of the nonterminal, which defaults to the name of the function.
- """
-
- def _rule(callable):
- return NonTerminal(callable, name)
-
- if callable(name_or_fn):
- name = name_or_fn.__name__
- return _rule(name_or_fn)
- else:
- name = name_or_fn
- return _rule
-
-
-class Grammar:
- """The base class for defining a grammar.
-
- Inherit from this, and and define members for your nonterminals, and then
- use the `build_tables` method to construct the parse tables.
-
-
- Here's an example of a simple grammar:
-
- PLUS = Token('+')
- LPAREN = Token('(')
- RPAREN = Token(')')
- ID = Token('id')
-
- class SimpleGrammar(Grammar):
- @rule
- def expression(self):
- return seq(self.expression, PLUS, self.term) | self.term
-
- @rule
- def term(self):
- return seq(LPAREN, self.expression, RPAREN) | ID
-
- Not very exciting, perhaps, but it's something.
- """
-
- def __init__(self, precedence: list[typing.Tuple[Assoc, list[Token | NonTerminal]]] = None):
- if precedence is None:
- precedence = getattr(self, "precedence", [])
-
- precedence_table = {}
- for precedence, (associativity, symbols) in enumerate(precedence):
- for symbol in symbols:
- if isinstance(symbol, Token):
- key = symbol.value
- elif isinstance(symbol, NonTerminal):
- key = symbol.name
- else:
- raise ValueError(f"{symbol} must be either a Token or a NonTerminal")
-
- precedence_table[key] = (associativity, precedence + 1)
-
- self._precedence = precedence_table
-
- def generate_nonterminal_dict(self, start: str) -> dict[str, list[list[str | Token]]]:
- """Convert the rules into a dictionary of productions.
-
- Our table generators work on a very flat set of productions. This is the
- first step in flattening the productions from the members: walk the rules
- starting from the given start rule and flatten them, one by one, into a
- dictionary that maps nonterminal rule name to its associated list of
- productions.
- """
- rules = inspect.getmembers(self, lambda x: isinstance(x, NonTerminal))
- nonterminals = {rule.name: rule for _, rule in rules}
-
- grammar = {}
-
- rule = nonterminals.get(start)
- if rule is None:
- raise ValueError(f"Cannot find a rule named '{start}'")
- queue = [rule]
- while len(queue) > 0:
- rule = queue.pop()
- if rule.name in grammar:
- continue
-
- body = rule.generate_body(self)
- for clause in body:
- for symbol in clause:
- if not isinstance(symbol, Token):
- assert isinstance(symbol, str)
- nonterminal = nonterminals.get(symbol)
- if nonterminal is None:
- raise ValueError(f"While processing {rule.name}: cannot find {symbol}")
- queue.append(nonterminal)
-
- grammar[rule.name] = body
-
- return grammar
-
- def desugar(self, start: str) -> list[typing.Tuple[str, list[str]]]:
- """Convert the rules into a flat list of productions.
-
- Our table generators work from a very flat set of productions. The form
- produced by this function is one level flatter than the one produced by
- generate_nonterminal_dict- less useful to people, probably, but it is
- the input form needed by the Generator.
- """
- temp_grammar = self.generate_nonterminal_dict(start)
-
- grammar = []
- for rule_name, clauses in temp_grammar.items():
- for clause in clauses:
- new_clause = []
- for symbol in clause:
- if isinstance(symbol, Token):
- new_clause.append(symbol.value)
- else:
- new_clause.append(symbol)
-
- grammar.append((rule_name, new_clause))
-
- return grammar
-
- def build_table(self, start: str, generator=GenerateLALR):
- """Construct a parse table for this grammar, starting at the named
- nonterminal rule.
- """
- desugared = self.desugar(start)
-
- gen = generator(start, desugared, precedence=self._precedence)
- table = gen.gen_table()
- return table
+ for i, s in enumerate(sets):
+ if self.sets_equal(s, set):
+ return i
+ return None
###############################################################################
@@ -1745,182 +694,173 @@ class Grammar:
###############################################################################
def format_node(node):
"""Print out an indented concrete syntax tree, from parse()."""
- lines = ["{name}".format(name=node[0])] + [
- " " + line for child in node[1] for line in format_node(child).split("\n")
+ lines = [
+ '{name}'.format(name=node[0])
+ ] + [
+ ' ' + line
+ for child in node[1]
+ for line in format_node(child).split('\n')
]
- return "\n".join(lines)
+ return '\n'.join(lines)
def format_table(generator, table):
"""Format a parser table so pretty."""
-
def format_action(state, terminal):
- action = state.get(terminal, ("error",))
- if action[0] == "accept":
- return "accept"
- elif action[0] == "shift":
- return "s" + str(action[1])
- elif action[0] == "error":
- return ""
- elif action[0] == "reduce":
- return "r" + str(action[1])
+ action = state.get(terminal, ('error',))
+ if action[0] == 'accept':
+ return 'accept'
+ elif action[0] == 'shift':
+ return 's' + str(action[1])
+ elif action[0] == 'error':
+ return ''
+ elif action[0] == 'reduce':
+ return 'r' + str(action[1])
- terminals = list(sorted(generator.alphabet[i] for i, v in enumerate(generator.terminal) if v))
- nonterminals = list(
- sorted(generator.alphabet[i] for i, v in enumerate(generator.nonterminal) if v)
- )
header = " | {terms} | {nts}".format(
- terms=" ".join("{0: <6}".format(terminal) for terminal in terminals),
- nts=" ".join("{0: <5}".format(nt) for nt in nonterminals),
+ terms=' '.join(
+ '{0: <6}'.format(terminal)
+ for terminal in sorted(generator.terminals)
+ ),
+ nts=' '.join(
+ '{0: <5}'.format(nt)
+ for nt in sorted(generator.nonterminals)
+ ),
)
lines = [
header,
- "-" * len(header),
+ '-' * len(header),
] + [
"{index: <3} | {actions} | {gotos}".format(
index=i,
- actions=" ".join(
- "{0: <6}".format(format_action(row, terminal)) for terminal in terminals
+ actions=' '.join(
+ '{0: <6}'.format(format_action(row, terminal))
+ for terminal in sorted(generator.terminals)
+ ),
+ gotos=' '.join(
+ '{0: <5}'.format(row.get(nt, ('error', ''))[1])
+ for nt in sorted(generator.nonterminals)
),
- gotos=" ".join("{0: <5}".format(row.get(nt, ("error", ""))[1]) for nt in nonterminals),
)
for i, row in enumerate(table)
]
- return "\n".join(lines)
+ return '\n'.join(lines)
###############################################################################
# Examples
###############################################################################
-def examples():
- def dump_grammar(grammar):
- for name, symbols in grammar:
- print(f"{name} -> {symbols}")
- print()
+# OK, this is a very simple LR0 grammar.
+grammar_simple = [
+ ('E', ['E', '+', 'T']),
+ ('E', ['T']),
+ ('T', ['(', 'E', ')']),
+ ('T', ['id']),
+]
- # OK, this is a very simple LR0 grammar.
- print("grammar_simple:")
- grammar_simple = [
- ("E", ["E", "+", "T"]),
- ("E", ["T"]),
- ("T", ["(", "E", ")"]),
- ("T", ["id"]),
- ]
+gen = GenerateLR0('E', grammar_simple)
+table = gen.gen_table()
+tree = parse(table, ['id', '+', '(', 'id', ')'])
+print(format_node(tree) + "\n")
+print()
- gen = GenerateLR0("E", grammar_simple)
+# This one doesn't work with LR0, though, it has a shift/reduce conflict.
+grammar_lr0_shift_reduce = grammar_simple + [
+ ('T', ['id', '[', 'E', ']']),
+]
+try:
+ gen = GenerateLR0('E', grammar_lr0_shift_reduce)
table = gen.gen_table()
- print(format_table(gen, table))
- tree = parse(table, ["id", "+", "(", "id", ")"])
- print(format_node(tree) + "\n")
- print()
+ assert False
+except ValueError as e:
+ print(e)
+print()
- # This one doesn't work with LR0, though, it has a shift/reduce conflict.
- print("grammar_lr0_shift_reduce (LR0):")
- grammar_lr0_shift_reduce = grammar_simple + [
- ("T", ["id", "[", "E", "]"]),
- ]
- try:
- gen = GenerateLR0("E", grammar_lr0_shift_reduce)
- table = gen.gen_table()
- assert False
- except ValueError as e:
- print(e)
- print()
-
- # Nor does this: it has a reduce/reduce conflict.
- print("grammar_lr0_reduce_reduce (LR0):")
- grammar_lr0_reduce_reduce = grammar_simple + [
- ("E", ["V", "=", "E"]),
- ("V", ["id"]),
- ]
- try:
- gen = GenerateLR0("E", grammar_lr0_reduce_reduce)
- table = gen.gen_table()
- assert False
- except ValueError as e:
- print(e)
- print()
-
- # Nullable symbols just don't work with constructs like this, because you can't
- # look ahead to figure out if you should reduce an empty 'F' or not.
- print("grammar_nullable (LR0):")
- grammar_nullable = [
- ("E", ["F", "boop"]),
- ("F", ["beep"]),
- ("F", []),
- ]
- try:
- gen = GenerateLR0("E", grammar_nullable)
- table = gen.gen_table()
- assert False
- except ValueError as e:
- print(e)
- print()
-
- print("grammar_lr0_shift_reduce (SLR1):")
- dump_grammar(grammar_lr0_shift_reduce)
- gen = GenerateSLR1("E", grammar_lr0_shift_reduce)
- print(f"Follow('E'): {str([gen.alphabet[f] for f in gen.gen_follow(gen.symbol_key['E'])])}")
+# Nor does this: it has a reduce/reduce conflict.
+grammar_lr0_reduce_reduce = grammar_simple + [
+ ('E', ['V', '=', 'E']),
+ ('V', ['id']),
+]
+try:
+ gen = GenerateLR0('E', grammar_lr0_reduce_reduce)
table = gen.gen_table()
- print(format_table(gen, table))
- tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"], trace=True)
- print(format_node(tree) + "\n")
- print()
+ assert False
+except ValueError as e:
+ print(e)
+print()
- # SLR1 can't handle this.
- print("grammar_aho_ullman_1 (SLR1):")
- grammar_aho_ullman_1 = [
- ("S", ["L", "=", "R"]),
- ("S", ["R"]),
- ("L", ["*", "R"]),
- ("L", ["id"]),
- ("R", ["L"]),
- ]
- try:
- gen = GenerateSLR1("S", grammar_aho_ullman_1)
- table = gen.gen_table()
- assert False
- except ValueError as e:
- print(e)
- print()
-
- # Here's an example with a full LR1 grammar, though.
- print("grammar_aho_ullman_2 (LR1):")
- grammar_aho_ullman_2 = [
- ("S", ["X", "X"]),
- ("X", ["a", "X"]),
- ("X", ["b"]),
- ]
- gen = GenerateLR1("S", grammar_aho_ullman_2)
+# Nullable symbols just don't work with constructs like this, because you can't
+# look ahead to figure out if you should reduce an empty 'F' or not.
+grammar_nullable = [
+ ('E', ['F', 'boop']),
+ ('F', ['beep']),
+ ('F', []),
+]
+try:
+ gen = GenerateLR0('E', grammar_nullable)
table = gen.gen_table()
- print(format_table(gen, table))
- parse(table, ["b", "a", "a", "b"], trace=True)
- print()
+ assert False
+except ValueError as e:
+ print(e)
- # What happens if we do LALR to it?
- print("grammar_aho_ullman_2 (LALR):")
- gen = GenerateLALR("S", grammar_aho_ullman_2)
+gen = GenerateSLR1('E', grammar_lr0_shift_reduce)
+print("First: {first}".format(first=str(gen.gen_first(['E']))))
+print("Follow: {follow}".format(follow=str(gen.gen_follow('E'))))
+table = gen.gen_table()
+print(format_table(gen, table))
+tree = parse(table, ['id', '+', '(', 'id', '[', 'id', ']', ')'])
+print(format_node(tree) + "\n")
+print()
+
+# SLR1 can't handle this.
+grammar_aho_ullman_1 = [
+ ('S', ['L', '=', 'R']),
+ ('S', ['R']),
+ ('L', ['*', 'R']),
+ ('L', ['id']),
+ ('R', ['L']),
+]
+try:
+ gen = GenerateSLR1('S', grammar_aho_ullman_1)
table = gen.gen_table()
- print(format_table(gen, table))
- print()
+ assert False
+except ValueError as e:
+ print(e)
+print()
- # A fun LALAR grammar.
- print("grammar_lalr:")
- grammar_lalr = [
- ("S", ["V", "E"]),
- ("E", ["F"]),
- ("E", ["E", "+", "F"]),
- ("F", ["V"]),
- ("F", ["int"]),
- ("F", ["(", "E", ")"]),
- ("V", ["id"]),
- ]
- gen = GenerateLALR("S", grammar_lalr)
- table = gen.gen_table()
- print(format_table(gen, table))
- print()
+# Here's an example with a full LR1 grammar, though.
+grammar_aho_ullman_2 = [
+ ('S', ['X', 'X']),
+ ('X', ['a', 'X']),
+ ('X', ['b']),
+]
+gen = GenerateLR1('S', grammar_aho_ullman_2)
+table = gen.gen_table()
+print(format_table(gen, table))
+parse(table, ['b', 'a', 'a', 'b'], trace=True)
+print()
+# What happens if we do LALR to it?
+gen = GenerateLALR('S', grammar_aho_ullman_2)
+table = gen.gen_table()
+print(format_table(gen, table))
+print()
-if __name__ == "__main__":
- examples()
+# A fun LALAR grammar.
+grammar_lalr = [
+ ('S', ['V', 'E']),
+
+ ('E', ['F']),
+ ('E', ['E', '+', 'F']),
+
+ ('F', ['V']),
+ ('F', ['int']),
+ ('F', ['(', 'E', ')']),
+
+ ('V', ['id']),
+]
+gen = GenerateLALR('S', grammar_lalr)
+table = gen.gen_table()
+print(format_table(gen, table))
+print()
diff --git a/pyproject.toml b/pyproject.toml
deleted file mode 100644
index 7cf2884..0000000
--- a/pyproject.toml
+++ /dev/null
@@ -1,13 +0,0 @@
-[project]
-name = "lrparsers"
-descrption = "a small LR parser generator library"
-authors = [
- {name = "John Doty", email = "john@d0ty.me"},
-]
-classifiers = [
- "Private :: Do Not Upload", # Probably.
- "License :: OSI Approved :: MIT License",
-]
-
-[tool.black]
-line-length=100
\ No newline at end of file