Vendor things

This commit is contained in:
John Doty 2024-03-08 11:03:01 -08:00
parent 5deceec006
commit 977e3c17e5
19434 changed files with 10682014 additions and 0 deletions

View file

@ -0,0 +1,177 @@
[[tests]]
name = "crazy-misc1"
pattern = '[-+]?[0-9]*\.?[0-9]+'
input = "0.1"
matches = [[0, 3]]
[[tests]]
name = "crazy-misc2"
pattern = '[-+]?[0-9]*\.?[0-9]+'
input = "0.1.2"
matches = [[0, 3]]
[[tests]]
name = "crazy-misc3"
pattern = '[-+]?[0-9]*\.?[0-9]+'
input = "a1.2"
matches = [[1, 4]]
[[tests]]
options = ["case-insensitive"]
name = "crazy-misc4"
pattern = '[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}'
input = "mine is jam.slam@gmail.com "
matches = [[8, 26]]
[[tests]]
options = ["case-insensitive"]
name = "crazy-misc5"
pattern = '[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}'
input = "mine is jam.slam@gmail "
matches = []
[[tests]]
name = "crazy-misc6"
pattern = '''[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?'''
input = "mine is jam.slam@gmail.com "
matches = [[8, 26]]
[[tests]]
name = "crazy-misc7"
pattern = '(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])'
input = "1900-01-01"
matches = [[0, 10]]
[[tests]]
name = "crazy-misc8"
pattern = '(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])'
input = "1900-00-01"
matches = []
[[tests]]
name = "crazy-misc9"
pattern = '(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])'
input = "1900-13-01"
matches = []
[[tests]]
name = "crazy-negclass1"
pattern = "[^ac]"
input = "acx"
matches = [[2, 3]]
[[tests]]
name = "crazy-negclass2"
pattern = "[^a,]"
input = "a,x"
matches = [[2, 3]]
[[tests]]
name = "crazy-negclass3"
pattern = '[^a\s]'
input = "a x"
matches = [[2, 3]]
[[tests]]
name = "crazy-negclass4"
pattern = "[^,]"
input = ",,x"
matches = [[2, 3]]
[[tests]]
name = "crazy-negclass5"
pattern = '[^\s]'
input = " a"
matches = [[1, 2]]
[[tests]]
name = "crazy-negclass6"
pattern = '[^,\s]'
input = ", a"
matches = [[2, 3]]
[[tests]]
name = "crazy-negclass7"
pattern = '[^\s,]'
input = " ,a"
matches = [[2, 3]]
[[tests]]
name = "crazy-negclass8"
pattern = "[^[:alpha:]Z]"
input = "A1"
matches = [[1, 2]]
[[tests]]
name = "crazy-empty-repeat1"
pattern = "((.*)*?)="
input = "a=b"
matches = [[0, 2]]
[[tests]]
name = "crazy-empty-repeat2"
pattern = "((.?)*?)="
input = "a=b"
matches = [[0, 2]]
[[tests]]
name = "crazy-empty-repeat3"
pattern = "((.*)+?)="
input = "a=b"
matches = [[0, 2]]
[[tests]]
name = "crazy-empty-repeat4"
pattern = "((.?)+?)="
input = "a=b"
matches = [[0, 2]]
[[tests]]
name = "crazy-empty-repeat5"
pattern = "((.*){1,}?)="
input = "a=b"
matches = [[0, 2]]
[[tests]]
name = "crazy-empty-repeat6"
pattern = "((.*){1,2}?)="
input = "a=b"
matches = [[0, 2]]
[[tests]]
name = "crazy-empty-repeat7"
pattern = "((.*)*)="
input = "a=b"
matches = [[0, 2]]
[[tests]]
name = "crazy-empty-repeat8"
pattern = "((.?)*)="
input = "a=b"
matches = [[0, 2]]
[[tests]]
name = "crazy-empty-repeat9"
pattern = "((.*)+)="
input = "a=b"
matches = [[0, 2]]
[[tests]]
name = "crazy-empty-repeat10"
pattern = "((.?)+)="
input = "a=b"
matches = [[0, 2]]
[[tests]]
name = "crazy-empty-repeat11"
pattern = "((.*){1,})="
input = "a=b"
matches = [[0, 2]]
[[tests]]
name = "crazy-empty-repeat12"
pattern = "((.*){1,2})="
input = "a=b"
matches = [[0, 2]]

View file

@ -0,0 +1,59 @@
[[tests]]
name = "flags1"
pattern = "(?i)abc"
input = "ABC"
matches = [[0, 3]]
[[tests]]
name = "flags2"
pattern = "(?i)a(?-i)bc"
input = "Abc"
matches = [[0, 3]]
[[tests]]
name = "flags3"
pattern = "(?i)a(?-i)bc"
input = "ABC"
matches = []
[[tests]]
name = "flags4"
pattern = "(?is)a."
input = "A\n"
matches = [[0, 2]]
[[tests]]
name = "flags5"
pattern = "(?is)a.(?-is)a."
input = "A\nab"
matches = [[0, 4]]
[[tests]]
name = "flags6"
pattern = "(?is)a.(?-is)a."
input = "A\na\n"
matches = []
[[tests]]
name = "flags7"
pattern = "(?is)a.(?-is:a.)?"
input = "A\na\n"
matches = [[0, 2]]
[[tests]]
name = "flags8"
pattern = "(?U)a+"
input = "aa"
matches = [[0, 1]]
[[tests]]
name = "flags9"
pattern = "(?U)a+?"
input = "aa"
matches = [[0, 2]]
[[tests]]
name = "flags10"
pattern = "(?U)(?-U)a+"
input = "aa"
matches = [[0, 2]]

View file

@ -0,0 +1,19 @@
The following license covers testregex.c and all associated test data.
Permission is hereby granted, free of charge, to any person obtaining a
copy of THIS SOFTWARE FILE (the "Software"), to deal in the Software
without restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, and/or sell copies of the
Software, and to permit persons to whom the Software is furnished to do
so, subject to the following disclaimer:
THIS SOFTWARE IS PROVIDED BY AT&T ``AS IS'' AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL AT&T BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -0,0 +1,23 @@
Test data was taken from the Go distribution, which was in turn taken from the
testregex test suite:
http://www2.research.att.com/~astopen/testregex/testregex.html
Unfortunately, the above link is now dead, but the test data lives on.
The LICENSE in this directory corresponds to the LICENSE that the data was
originally released under.
The tests themselves were modified for RE2/Go. A couple were modified further
by me (Andrew Gallant) (only in repetition.dat) so that RE2/Go would pass them.
(Yes, it seems like RE2/Go includes failing test cases.) This may or may not
have been a bad idea, but I think being consistent with an established Regex
library is worth something.
After some number of years, these tests were transformed into a JSON format
using the fowler-to-json script in this directory, e.g.,
./fowler-to-json basic.dat > basic.json
which brings them into a sensible structured format in which other tests can
be written.

View file

@ -0,0 +1,221 @@
NOTE all standard compliant implementations should pass these : 2002-05-31
BE abracadabra$ abracadabracadabra (7,18)
BE a...b abababbb (2,7)
BE XXXXXX ..XXXXXX (2,8)
E \) () (1,2)
BE a] a]a (0,2)
B } } (0,1)
E \} } (0,1)
BE \] ] (0,1)
B ] ] (0,1)
E ] ] (0,1)
B { { (0,1)
B } } (0,1)
BE ^a ax (0,1)
BE \^a a^a (1,3)
BE a\^ a^ (0,2)
BE a$ aa (1,2)
BE a\$ a$ (0,2)
BE ^$ NULL (0,0)
E $^ NULL (0,0)
E a($) aa (1,2)(2,2)
E a*(^a) aa (0,1)(0,1)
E (..)*(...)* a (0,0)
E (..)*(...)* abcd (0,4)(2,4)
E (ab|a)(bc|c) abc (0,3)(0,2)(2,3)
E (ab)c|abc abc (0,3)(0,2)
E a{0}b ab (1,2)
E (a*)(b?)(b+)b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
E (a*)(b{0,1})(b{1,})b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
E a{9876543210} NULL BADBR
E ((a|a)|a) a (0,1)(0,1)(0,1)
E (a*)(a|aa) aaaa (0,4)(0,3)(3,4)
E a*(a.|aa) aaaa (0,4)(2,4)
E a(b)|c(d)|a(e)f aef (0,3)(?,?)(?,?)(1,2)
E (a|b)?.* b (0,1)(0,1)
E (a|b)c|a(b|c) ac (0,2)(0,1)
E (a|b)c|a(b|c) ab (0,2)(?,?)(1,2)
E (a|b)*c|(a|ab)*c abc (0,3)(1,2)
E (a|b)*c|(a|ab)*c xc (1,2)
E (.a|.b).*|.*(.a|.b) xa (0,2)(0,2)
E a?(ab|ba)ab abab (0,4)(0,2)
E a?(ac{0}b|ba)ab abab (0,4)(0,2)
E ab|abab abbabab (0,2)
E aba|bab|bba baaabbbaba (5,8)
E aba|bab baaabbbaba (6,9)
E (aa|aaa)*|(a|aaaaa) aa (0,2)(0,2)
E (a.|.a.)*|(a|.a...) aa (0,2)(0,2)
E ab|a xabc (1,3)
E ab|a xxabc (2,4)
Ei (Ab|cD)* aBcD (0,4)(2,4)
BE [^-] --a (2,3)
BE [a-]* --a (0,3)
BE [a-m-]* --amoma-- (0,4)
E :::1:::0:|:::1:1:0: :::0:::1:::1:::0: (8,17)
E :::1:::0:|:::1:1:1: :::0:::1:::1:::0: (8,17)
{E [[:upper:]] A (0,1) [[<element>]] not supported
E [[:lower:]]+ `az{ (1,3)
E [[:upper:]]+ @AZ[ (1,3)
# No collation in Go
#BE [[-]] [[-]] (2,4)
#BE [[.NIL.]] NULL ECOLLATE
#BE [[=aleph=]] NULL ECOLLATE
}
BE$ \n \n (0,1)
BEn$ \n \n (0,1)
BE$ [^a] \n (0,1)
BE$ \na \na (0,2)
E (a)(b)(c) abc (0,3)(0,1)(1,2)(2,3)
BE xxx xxx (0,3)
E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6)
E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3)
E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11)
E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1)
E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2)
E a?(ab|ba)* ababababababababababababababababababababababababababababababababababababababababa (0,81)(79,81)
E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabbbbaa (18,25)
E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabaa (18,22)
E aaac|aabc|abac|abbc|baac|babc|bbac|bbbc baaabbbabac (7,11)
BE$ .* \x01\x7f (0,2)
E aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa (53,57)
L aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa NOMATCH
E a*a*a*a*a*b aaaaaaaaab (0,10)
BE ^ NULL (0,0)
BE $ NULL (0,0)
BE ^$ NULL (0,0)
BE ^a$ a (0,1)
BE abc abc (0,3)
BE abc xabcy (1,4)
BE abc ababc (2,5)
BE ab*c abc (0,3)
BE ab*bc abc (0,3)
BE ab*bc abbc (0,4)
BE ab*bc abbbbc (0,6)
E ab+bc abbc (0,4)
E ab+bc abbbbc (0,6)
E ab?bc abbc (0,4)
E ab?bc abc (0,3)
E ab?c abc (0,3)
BE ^abc$ abc (0,3)
BE ^abc abcc (0,3)
BE abc$ aabc (1,4)
BE ^ abc (0,0)
BE $ abc (3,3)
BE a.c abc (0,3)
BE a.c axc (0,3)
BE a.*c axyzc (0,5)
BE a[bc]d abd (0,3)
BE a[b-d]e ace (0,3)
BE a[b-d] aac (1,3)
BE a[-b] a- (0,2)
BE a[b-] a- (0,2)
BE a] a] (0,2)
BE a[]]b a]b (0,3)
BE a[^bc]d aed (0,3)
BE a[^-b]c adc (0,3)
BE a[^]b]c adc (0,3)
E ab|cd abc (0,2)
E ab|cd abcd (0,2)
E a\(b a(b (0,3)
E a\(*b ab (0,2)
E a\(*b a((b (0,4)
E ((a)) abc (0,1)(0,1)(0,1)
E (a)b(c) abc (0,3)(0,1)(2,3)
E a+b+c aabbabc (4,7)
E a* aaa (0,3)
#E (a*)* - (0,0)(0,0)
E (a*)* - (0,0)(?,?) RE2/Go
E (a*)+ - (0,0)(0,0)
#E (a*|b)* - (0,0)(0,0)
E (a*|b)* - (0,0)(?,?) RE2/Go
E (a+|b)* ab (0,2)(1,2)
E (a+|b)+ ab (0,2)(1,2)
E (a+|b)? ab (0,1)(0,1)
BE [^ab]* cde (0,3)
#E (^)* - (0,0)(0,0)
E (^)* - (0,0)(?,?) RE2/Go
BE a* NULL (0,0)
E ([abc])*d abbbcd (0,6)(4,5)
E ([abc])*bcd abcd (0,4)(0,1)
E a|b|c|d|e e (0,1)
E (a|b|c|d|e)f ef (0,2)(0,1)
#E ((a*|b))* - (0,0)(0,0)(0,0)
E ((a*|b))* - (0,0)(?,?)(?,?) RE2/Go
BE abcd*efg abcdefg (0,7)
BE ab* xabyabbbz (1,3)
BE ab* xayabbbz (1,2)
E (ab|cd)e abcde (2,5)(2,4)
BE [abhgefdc]ij hij (0,3)
E (a|b)c*d abcd (1,4)(1,2)
E (ab|ab*)bc abc (0,3)(0,1)
E a([bc]*)c* abc (0,3)(1,3)
E a([bc]*)(c*d) abcd (0,4)(1,3)(3,4)
E a([bc]+)(c*d) abcd (0,4)(1,3)(3,4)
E a([bc]*)(c+d) abcd (0,4)(1,2)(2,4)
E a[bcd]*dcdcde adcdcde (0,7)
E (ab|a)b*c abc (0,3)(0,2)
E ((a)(b)c)(d) abcd (0,4)(0,3)(0,1)(1,2)(3,4)
BE [A-Za-z_][A-Za-z0-9_]* alpha (0,5)
E ^a(bc+|b[eh])g|.h$ abh (1,3)
E (bc+d$|ef*g.|h?i(j|k)) effgz (0,5)(0,5)
E (bc+d$|ef*g.|h?i(j|k)) ij (0,2)(0,2)(1,2)
E (bc+d$|ef*g.|h?i(j|k)) reffgz (1,6)(1,6)
E (((((((((a))))))))) a (0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)
BE multiple words multiple words yeah (0,14)
E (.*)c(.*) abcde (0,5)(0,2)(3,5)
BE abcd abcd (0,4)
E a(bc)d abcd (0,4)(1,3)
E a[-]?c ac (0,3)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qaddafi (0,15)(?,?)(10,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mo'ammar Gadhafi (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Kaddafi (0,15)(?,?)(10,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qadhafi (0,15)(?,?)(10,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gadafi (0,14)(?,?)(10,11)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadafi (0,15)(?,?)(11,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moamar Gaddafi (0,14)(?,?)(9,11)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadhdhafi (0,18)(?,?)(13,15)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Khaddafi (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafy (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghadafi (0,15)(?,?)(11,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafi (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muamar Kaddafi (0,14)(?,?)(9,11)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Quathafi (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gheddafi (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Khadafy (0,15)(?,?)(11,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Qudhafi (0,15)(?,?)(10,12)
E a+(b|c)*d+ aabcdd (0,6)(3,4)
E ^.+$ vivi (0,4)
E ^(.+)$ vivi (0,4)(0,4)
E ^([^!.]+).att.com!(.+)$ gryphon.att.com!eby (0,19)(0,7)(16,19)
E ^([^!]+!)?([^!]+)$ bas (0,3)(?,?)(0,3)
E ^([^!]+!)?([^!]+)$ bar!bas (0,7)(0,4)(4,7)
E ^([^!]+!)?([^!]+)$ foo!bas (0,7)(0,4)(4,7)
E ^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(4,8)(8,11)
E ((foo)|(bar))!bas bar!bas (0,7)(0,3)(?,?)(0,3)
E ((foo)|(bar))!bas foo!bar!bas (4,11)(4,7)(?,?)(4,7)
E ((foo)|(bar))!bas foo!bas (0,7)(0,3)(0,3)
E ((foo)|bar)!bas bar!bas (0,7)(0,3)
E ((foo)|bar)!bas foo!bar!bas (4,11)(4,7)
E ((foo)|bar)!bas foo!bas (0,7)(0,3)(0,3)
E (foo|(bar))!bas bar!bas (0,7)(0,3)(0,3)
E (foo|(bar))!bas foo!bar!bas (4,11)(4,7)(4,7)
E (foo|(bar))!bas foo!bas (0,7)(0,3)
E (foo|bar)!bas bar!bas (0,7)(0,3)
E (foo|bar)!bas foo!bar!bas (4,11)(4,7)
E (foo|bar)!bas foo!bas (0,7)(0,3)
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bas (0,3)(?,?)(0,3)
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bar!bas (0,7)(0,4)(4,7)
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(?,?)(?,?)(4,8)(8,11)
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bas (0,7)(0,4)(4,7)
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bas (0,3)(0,3)(?,?)(0,3)
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bar!bas (0,7)(0,7)(0,4)(4,7)
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bas (0,7)(0,7)(0,4)(4,7)
E .*(/XXX).* /XXX (0,4)(0,4)
E .*(\\XXX).* \XXX (0,4)(0,4)
E \\XXX \XXX (0,4)
E .*(/000).* /000 (0,4)(0,4)
E .*(\\000).* \000 (0,4)(0,4)
E \\000 \000 (0,4)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,76 @@
#!/usr/bin/env python
from __future__ import absolute_import, division, print_function
import argparse
import os.path as path
def read_tests(f):
basename, _ = path.splitext(path.basename(f))
tests = []
prev_pattern = None
for lineno, line in enumerate(open(f), 1):
fields = list(filter(None, map(str.strip, line.split('\t'))))
if not (4 <= len(fields) <= 5) \
or 'E' not in fields[0] or fields[0][0] == '#':
continue
terse_opts, pat, text, sgroups = fields[0:4]
groups = [] # groups as integer ranges
if sgroups == 'NOMATCH':
groups = []
elif ',' in sgroups:
noparen = map(lambda s: s.strip('()'), sgroups.split(')('))
for g in noparen:
s, e = map(str.strip, g.split(','))
groups.append([int(s), int(e)])
break
else:
# This skips tests that should result in an error.
# There aren't many, so I think we can just capture those
# manually. Possibly fix this in future.
continue
opts = []
if text == "NULL":
text = ""
if pat == 'SAME':
pat = prev_pattern
if '$' in terse_opts:
pat = pat.encode('utf-8').decode('unicode_escape')
text = text.encode('utf-8').decode('unicode_escape')
text = text.encode('unicode_escape').decode('utf-8')
opts.append('escaped')
else:
opts.append('escaped')
text = text.encode('unicode_escape').decode('utf-8')
if 'i' in terse_opts:
opts.append('case-insensitive')
pat = pat.encode('unicode_escape').decode('utf-8')
pat = pat.replace('\\\\', '\\')
tests.append({
'name': '"%s%d"' % (basename, lineno),
'options': repr(opts),
'pattern': "'''%s'''" % pat,
'input': "'''%s'''" % text,
'matches': str(groups),
})
prev_pattern = pat
return tests
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Generate match tests from an AT&T POSIX test file.')
aa = parser.add_argument
aa('datfile', help='A dat AT&T POSIX test file.')
args = parser.parse_args()
tests = read_tests(args.datfile)
for t in tests:
print('[[tests]]')
for k, v in t.items():
print('%s = %s' % (k, v))
print('')

View file

@ -0,0 +1,79 @@
NOTE null subexpression matches : 2002-06-06
E (a*)* a (0,1)(0,1)
#E SAME x (0,0)(0,0)
E SAME x (0,0)(?,?) RE2/Go
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E (a*)+ a (0,1)(0,1)
E SAME x (0,0)(0,0)
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E (a+)* a (0,1)(0,1)
E SAME x (0,0)
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E (a+)+ a (0,1)(0,1)
E SAME x NOMATCH
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E ([a]*)* a (0,1)(0,1)
#E SAME x (0,0)(0,0)
E SAME x (0,0)(?,?) RE2/Go
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E ([a]*)+ a (0,1)(0,1)
E SAME x (0,0)(0,0)
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E ([^b]*)* a (0,1)(0,1)
#E SAME b (0,0)(0,0)
E SAME b (0,0)(?,?) RE2/Go
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaab (0,6)(0,6)
E ([ab]*)* a (0,1)(0,1)
E SAME aaaaaa (0,6)(0,6)
E SAME ababab (0,6)(0,6)
E SAME bababa (0,6)(0,6)
E SAME b (0,1)(0,1)
E SAME bbbbbb (0,6)(0,6)
E SAME aaaabcde (0,5)(0,5)
E ([^a]*)* b (0,1)(0,1)
E SAME bbbbbb (0,6)(0,6)
#E SAME aaaaaa (0,0)(0,0)
E SAME aaaaaa (0,0)(?,?) RE2/Go
E ([^ab]*)* ccccxx (0,6)(0,6)
#E SAME ababab (0,0)(0,0)
E SAME ababab (0,0)(?,?) RE2/Go
E ((z)+|a)* zabcde (0,2)(1,2)
#{E a+? aaaaaa (0,1) no *? +? mimimal match ops
#E (a) aaa (0,1)(0,1)
#E (a*?) aaa (0,0)(0,0)
#E (a)*? aaa (0,0)
#E (a*?)*? aaa (0,0)
#}
B \(a*\)*\(x\) x (0,1)(0,0)(0,1)
B \(a*\)*\(x\) ax (0,2)(0,1)(1,2)
B \(a*\)*\(x\) axa (0,2)(0,1)(1,2)
B \(a*\)*\(x\)\(\1\) x (0,1)(0,0)(0,1)(1,1)
B \(a*\)*\(x\)\(\1\) ax (0,2)(1,1)(1,2)(2,2)
B \(a*\)*\(x\)\(\1\) axa (0,3)(0,1)(1,2)(2,3)
B \(a*\)*\(x\)\(\1\)\(x\) axax (0,4)(0,1)(1,2)(2,3)(3,4)
B \(a*\)*\(x\)\(\1\)\(x\) axxa (0,3)(1,1)(1,2)(2,2)(2,3)
#E (a*)*(x) x (0,1)(0,0)(0,1)
E (a*)*(x) x (0,1)(?,?)(0,1) RE2/Go
E (a*)*(x) ax (0,2)(0,1)(1,2)
E (a*)*(x) axa (0,2)(0,1)(1,2)
E (a*)+(x) x (0,1)(0,0)(0,1)
E (a*)+(x) ax (0,2)(0,1)(1,2)
E (a*)+(x) axa (0,2)(0,1)(1,2)
E (a*){2}(x) x (0,1)(0,0)(0,1)
E (a*){2}(x) ax (0,2)(1,1)(1,2)
E (a*){2}(x) axa (0,2)(1,1)(1,2)

View file

@ -0,0 +1,350 @@
[[tests]]
name = "nullsubexpr3"
options = ['escaped']
pattern = '''(a*)*'''
input = '''a'''
matches = [[0, 1]]
[[tests]]
name = "nullsubexpr5"
options = ['escaped']
pattern = '''(a*)*'''
input = '''x'''
matches = [[0, 0]]
[[tests]]
name = "nullsubexpr6"
options = ['escaped']
pattern = '''(a*)*'''
input = '''aaaaaa'''
matches = [[0, 6]]
[[tests]]
name = "nullsubexpr7"
options = ['escaped']
pattern = '''(a*)*'''
input = '''aaaaaax'''
matches = [[0, 6]]
[[tests]]
name = "nullsubexpr8"
options = ['escaped']
pattern = '''(a*)+'''
input = '''a'''
matches = [[0, 1]]
[[tests]]
name = "nullsubexpr9"
options = ['escaped']
pattern = '''(a*)+'''
input = '''x'''
matches = [[0, 0]]
[[tests]]
name = "nullsubexpr10"
options = ['escaped']
pattern = '''(a*)+'''
input = '''aaaaaa'''
matches = [[0, 6]]
[[tests]]
name = "nullsubexpr11"
options = ['escaped']
pattern = '''(a*)+'''
input = '''aaaaaax'''
matches = [[0, 6]]
[[tests]]
name = "nullsubexpr12"
options = ['escaped']
pattern = '''(a+)*'''
input = '''a'''
matches = [[0, 1]]
[[tests]]
name = "nullsubexpr13"
options = ['escaped']
pattern = '''(a+)*'''
input = '''x'''
matches = [[0, 0]]
[[tests]]
name = "nullsubexpr14"
options = ['escaped']
pattern = '''(a+)*'''
input = '''aaaaaa'''
matches = [[0, 6]]
[[tests]]
name = "nullsubexpr15"
options = ['escaped']
pattern = '''(a+)*'''
input = '''aaaaaax'''
matches = [[0, 6]]
[[tests]]
name = "nullsubexpr16"
options = ['escaped']
pattern = '''(a+)+'''
input = '''a'''
matches = [[0, 1]]
[[tests]]
name = "nullsubexpr17"
options = ['escaped']
pattern = '''(a+)+'''
input = '''x'''
matches = []
[[tests]]
name = "nullsubexpr18"
options = ['escaped']
pattern = '''(a+)+'''
input = '''aaaaaa'''
matches = [[0, 6]]
[[tests]]
name = "nullsubexpr19"
options = ['escaped']
pattern = '''(a+)+'''
input = '''aaaaaax'''
matches = [[0, 6]]
[[tests]]
name = "nullsubexpr21"
options = ['escaped']
pattern = '''([a]*)*'''
input = '''a'''
matches = [[0, 1]]
[[tests]]
name = "nullsubexpr23"
options = ['escaped']
pattern = '''([a]*)*'''
input = '''x'''
matches = [[0, 0]]
[[tests]]
name = "nullsubexpr24"
options = ['escaped']
pattern = '''([a]*)*'''
input = '''aaaaaa'''
matches = [[0, 6]]
[[tests]]
name = "nullsubexpr25"
options = ['escaped']
pattern = '''([a]*)*'''
input = '''aaaaaax'''
matches = [[0, 6]]
[[tests]]
name = "nullsubexpr26"
options = ['escaped']
pattern = '''([a]*)+'''
input = '''a'''
matches = [[0, 1]]
[[tests]]
name = "nullsubexpr27"
options = ['escaped']
pattern = '''([a]*)+'''
input = '''x'''
matches = [[0, 0]]
[[tests]]
name = "nullsubexpr28"
options = ['escaped']
pattern = '''([a]*)+'''
input = '''aaaaaa'''
matches = [[0, 6]]
[[tests]]
name = "nullsubexpr29"
options = ['escaped']
pattern = '''([a]*)+'''
input = '''aaaaaax'''
matches = [[0, 6]]
[[tests]]
name = "nullsubexpr30"
options = ['escaped']
pattern = '''([^b]*)*'''
input = '''a'''
matches = [[0, 1]]
[[tests]]
name = "nullsubexpr32"
options = ['escaped']
pattern = '''([^b]*)*'''
input = '''b'''
matches = [[0, 0]]
[[tests]]
name = "nullsubexpr33"
options = ['escaped']
pattern = '''([^b]*)*'''
input = '''aaaaaa'''
matches = [[0, 6]]
[[tests]]
name = "nullsubexpr34"
options = ['escaped']
pattern = '''([^b]*)*'''
input = '''aaaaaab'''
matches = [[0, 6]]
[[tests]]
name = "nullsubexpr35"
options = ['escaped']
pattern = '''([ab]*)*'''
input = '''a'''
matches = [[0, 1]]
[[tests]]
name = "nullsubexpr36"
options = ['escaped']
pattern = '''([ab]*)*'''
input = '''aaaaaa'''
matches = [[0, 6]]
[[tests]]
name = "nullsubexpr37"
options = ['escaped']
pattern = '''([ab]*)*'''
input = '''ababab'''
matches = [[0, 6]]
[[tests]]
name = "nullsubexpr38"
options = ['escaped']
pattern = '''([ab]*)*'''
input = '''bababa'''
matches = [[0, 6]]
[[tests]]
name = "nullsubexpr39"
options = ['escaped']
pattern = '''([ab]*)*'''
input = '''b'''
matches = [[0, 1]]
[[tests]]
name = "nullsubexpr40"
options = ['escaped']
pattern = '''([ab]*)*'''
input = '''bbbbbb'''
matches = [[0, 6]]
[[tests]]
name = "nullsubexpr41"
options = ['escaped']
pattern = '''([ab]*)*'''
input = '''aaaabcde'''
matches = [[0, 5]]
[[tests]]
name = "nullsubexpr42"
options = ['escaped']
pattern = '''([^a]*)*'''
input = '''b'''
matches = [[0, 1]]
[[tests]]
name = "nullsubexpr43"
options = ['escaped']
pattern = '''([^a]*)*'''
input = '''bbbbbb'''
matches = [[0, 6]]
[[tests]]
name = "nullsubexpr45"
options = ['escaped']
pattern = '''([^a]*)*'''
input = '''aaaaaa'''
matches = [[0, 0]]
[[tests]]
name = "nullsubexpr46"
options = ['escaped']
pattern = '''([^ab]*)*'''
input = '''ccccxx'''
matches = [[0, 6]]
[[tests]]
name = "nullsubexpr48"
options = ['escaped']
pattern = '''([^ab]*)*'''
input = '''ababab'''
matches = [[0, 0]]
[[tests]]
name = "nullsubexpr50"
options = ['escaped']
pattern = '''((z)+|a)*'''
input = '''zabcde'''
matches = [[0, 2]]
[[tests]]
name = "nullsubexpr69"
options = ['escaped']
pattern = '''(a*)*(x)'''
input = '''x'''
matches = [[0, 1]]
[[tests]]
name = "nullsubexpr70"
options = ['escaped']
pattern = '''(a*)*(x)'''
input = '''ax'''
matches = [[0, 2]]
[[tests]]
name = "nullsubexpr71"
options = ['escaped']
pattern = '''(a*)*(x)'''
input = '''axa'''
matches = [[0, 2]]
[[tests]]
name = "nullsubexpr73"
options = ['escaped']
pattern = '''(a*)+(x)'''
input = '''x'''
matches = [[0, 1]]
[[tests]]
name = "nullsubexpr74"
options = ['escaped']
pattern = '''(a*)+(x)'''
input = '''ax'''
matches = [[0, 2]]
[[tests]]
name = "nullsubexpr75"
options = ['escaped']
pattern = '''(a*)+(x)'''
input = '''axa'''
matches = [[0, 2]]
[[tests]]
name = "nullsubexpr77"
options = ['escaped']
pattern = '''(a*){2}(x)'''
input = '''x'''
matches = [[0, 1]]
[[tests]]
name = "nullsubexpr78"
options = ['escaped']
pattern = '''(a*){2}(x)'''
input = '''ax'''
matches = [[0, 2]]
[[tests]]
name = "nullsubexpr79"
options = ['escaped']
pattern = '''(a*){2}(x)'''
input = '''axa'''
matches = [[0, 2]]

View file

@ -0,0 +1,85 @@
NOTE implicit vs. explicit repetitions : 2009-02-02
# Glenn Fowler <gsf@research.att.com>
# conforming matches (column 4) must match one of the following BREs
# NOMATCH
# (0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)*
# (0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)*
# i.e., each 3-tuple has two identical elements and one (?,?)
NOTE additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02
:HA#100:E X(.?){0,}Y X1234567Y (0,9)(7,8)
:HA#101:E X(.?){1,}Y X1234567Y (0,9)(7,8)
:HA#102:E X(.?){2,}Y X1234567Y (0,9)(7,8)
:HA#103:E X(.?){3,}Y X1234567Y (0,9)(7,8)
:HA#104:E X(.?){4,}Y X1234567Y (0,9)(7,8)
:HA#105:E X(.?){5,}Y X1234567Y (0,9)(7,8)
:HA#106:E X(.?){6,}Y X1234567Y (0,9)(7,8)
:HA#107:E X(.?){7,}Y X1234567Y (0,9)(7,8)
:HA#108:E X(.?){8,}Y X1234567Y (0,9)(8,8)
#:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(7,8)
:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(7,8)
:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(7,8)
:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(7,8)
:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(7,8)
:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(7,8)
:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(7,8)
:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(7,8)
:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(8,8) RE2/Go
:HA#118:E X(.?){8,8}Y X1234567Y (0,9)(8,8)
# These test a fixed bug in my regex-tdfa that did not keep the expanded
# form properly grouped, so right association did the wrong thing with
# these ambiguous patterns (crafted just to test my code when I became
# suspicious of my implementation). The first subexpression should use
# "ab" then "a" then "bcd".
# OS X / FreeBSD / NetBSD badly fail many of these, with impossible
# results like (0,6)(4,5)(6,6).
:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,1)(0,1)(1,1)
:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,1)(0,1)(1,1)
:HA#262:E (a|ab|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#263:E (a|ab|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#264:E (a|ab|c|bcd){4,}(d*) ababcd NOMATCH
:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,1)(0,1)(1,1)
:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,1)(0,1)(1,1)
:HA#267:E (a|ab|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#268:E (a|ab|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#269:E (a|ab|c|bcd){4,10}(d*) ababcd NOMATCH
:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,1)(0,1)(1,1)
:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,1)(0,1)(1,1)
# The above worked on Linux/GLIBC but the following often fail.
# They also trip up OS X / FreeBSD / NetBSD:
#:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
:HA#284:E (ab|a|c|bcd){4,}(d*) ababcd NOMATCH
#:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
:HA#289:E (ab|a|c|bcd){4,10}(d*) ababcd NOMATCH
#:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6)
:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6)
:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(4,5)(5,6) RE2/Go

View file

@ -0,0 +1,294 @@
[[tests]]
name = "repetition-long12"
options = ['escaped']
pattern = '''X(.?){0,}Y'''
input = '''X1234567Y'''
matches = [[0, 9]]
[[tests]]
name = "repetition-long13"
options = ['escaped']
pattern = '''X(.?){1,}Y'''
input = '''X1234567Y'''
matches = [[0, 9]]
[[tests]]
name = "repetition-long14"
options = ['escaped']
pattern = '''X(.?){2,}Y'''
input = '''X1234567Y'''
matches = [[0, 9]]
[[tests]]
name = "repetition-long15"
options = ['escaped']
pattern = '''X(.?){3,}Y'''
input = '''X1234567Y'''
matches = [[0, 9]]
[[tests]]
name = "repetition-long16"
options = ['escaped']
pattern = '''X(.?){4,}Y'''
input = '''X1234567Y'''
matches = [[0, 9]]
[[tests]]
name = "repetition-long17"
options = ['escaped']
pattern = '''X(.?){5,}Y'''
input = '''X1234567Y'''
matches = [[0, 9]]
[[tests]]
name = "repetition-long18"
options = ['escaped']
pattern = '''X(.?){6,}Y'''
input = '''X1234567Y'''
matches = [[0, 9]]
[[tests]]
name = "repetition-long19"
options = ['escaped']
pattern = '''X(.?){7,}Y'''
input = '''X1234567Y'''
matches = [[0, 9]]
[[tests]]
name = "repetition-long20"
options = ['escaped']
pattern = '''X(.?){8,}Y'''
input = '''X1234567Y'''
matches = [[0, 9]]
[[tests]]
name = "repetition-long22"
options = ['escaped']
pattern = '''X(.?){0,8}Y'''
input = '''X1234567Y'''
matches = [[0, 9]]
[[tests]]
name = "repetition-long24"
options = ['escaped']
pattern = '''X(.?){1,8}Y'''
input = '''X1234567Y'''
matches = [[0, 9]]
[[tests]]
name = "repetition-long26"
options = ['escaped']
pattern = '''X(.?){2,8}Y'''
input = '''X1234567Y'''
matches = [[0, 9]]
[[tests]]
name = "repetition-long28"
options = ['escaped']
pattern = '''X(.?){3,8}Y'''
input = '''X1234567Y'''
matches = [[0, 9]]
[[tests]]
name = "repetition-long30"
options = ['escaped']
pattern = '''X(.?){4,8}Y'''
input = '''X1234567Y'''
matches = [[0, 9]]
[[tests]]
name = "repetition-long32"
options = ['escaped']
pattern = '''X(.?){5,8}Y'''
input = '''X1234567Y'''
matches = [[0, 9]]
[[tests]]
name = "repetition-long34"
options = ['escaped']
pattern = '''X(.?){6,8}Y'''
input = '''X1234567Y'''
matches = [[0, 9]]
[[tests]]
name = "repetition-long36"
options = ['escaped']
pattern = '''X(.?){7,8}Y'''
input = '''X1234567Y'''
matches = [[0, 9]]
[[tests]]
name = "repetition-long37"
options = ['escaped']
pattern = '''X(.?){8,8}Y'''
input = '''X1234567Y'''
matches = [[0, 9]]
[[tests]]
name = "repetition-long48"
options = ['escaped']
pattern = '''(a|ab|c|bcd){0,}(d*)'''
input = '''ababcd'''
matches = [[0, 1]]
[[tests]]
name = "repetition-long49"
options = ['escaped']
pattern = '''(a|ab|c|bcd){1,}(d*)'''
input = '''ababcd'''
matches = [[0, 1]]
[[tests]]
name = "repetition-long50"
options = ['escaped']
pattern = '''(a|ab|c|bcd){2,}(d*)'''
input = '''ababcd'''
matches = [[0, 6]]
[[tests]]
name = "repetition-long51"
options = ['escaped']
pattern = '''(a|ab|c|bcd){3,}(d*)'''
input = '''ababcd'''
matches = [[0, 6]]
[[tests]]
name = "repetition-long52"
options = ['escaped']
pattern = '''(a|ab|c|bcd){4,}(d*)'''
input = '''ababcd'''
matches = []
[[tests]]
name = "repetition-long53"
options = ['escaped']
pattern = '''(a|ab|c|bcd){0,10}(d*)'''
input = '''ababcd'''
matches = [[0, 1]]
[[tests]]
name = "repetition-long54"
options = ['escaped']
pattern = '''(a|ab|c|bcd){1,10}(d*)'''
input = '''ababcd'''
matches = [[0, 1]]
[[tests]]
name = "repetition-long55"
options = ['escaped']
pattern = '''(a|ab|c|bcd){2,10}(d*)'''
input = '''ababcd'''
matches = [[0, 6]]
[[tests]]
name = "repetition-long56"
options = ['escaped']
pattern = '''(a|ab|c|bcd){3,10}(d*)'''
input = '''ababcd'''
matches = [[0, 6]]
[[tests]]
name = "repetition-long57"
options = ['escaped']
pattern = '''(a|ab|c|bcd){4,10}(d*)'''
input = '''ababcd'''
matches = []
[[tests]]
name = "repetition-long58"
options = ['escaped']
pattern = '''(a|ab|c|bcd)*(d*)'''
input = '''ababcd'''
matches = [[0, 1]]
[[tests]]
name = "repetition-long59"
options = ['escaped']
pattern = '''(a|ab|c|bcd)+(d*)'''
input = '''ababcd'''
matches = [[0, 1]]
[[tests]]
name = "repetition-long65"
options = ['escaped']
pattern = '''(ab|a|c|bcd){0,}(d*)'''
input = '''ababcd'''
matches = [[0, 6]]
[[tests]]
name = "repetition-long67"
options = ['escaped']
pattern = '''(ab|a|c|bcd){1,}(d*)'''
input = '''ababcd'''
matches = [[0, 6]]
[[tests]]
name = "repetition-long69"
options = ['escaped']
pattern = '''(ab|a|c|bcd){2,}(d*)'''
input = '''ababcd'''
matches = [[0, 6]]
[[tests]]
name = "repetition-long71"
options = ['escaped']
pattern = '''(ab|a|c|bcd){3,}(d*)'''
input = '''ababcd'''
matches = [[0, 6]]
[[tests]]
name = "repetition-long72"
options = ['escaped']
pattern = '''(ab|a|c|bcd){4,}(d*)'''
input = '''ababcd'''
matches = []
[[tests]]
name = "repetition-long74"
options = ['escaped']
pattern = '''(ab|a|c|bcd){0,10}(d*)'''
input = '''ababcd'''
matches = [[0, 6]]
[[tests]]
name = "repetition-long76"
options = ['escaped']
pattern = '''(ab|a|c|bcd){1,10}(d*)'''
input = '''ababcd'''
matches = [[0, 6]]
[[tests]]
name = "repetition-long78"
options = ['escaped']
pattern = '''(ab|a|c|bcd){2,10}(d*)'''
input = '''ababcd'''
matches = [[0, 6]]
[[tests]]
name = "repetition-long80"
options = ['escaped']
pattern = '''(ab|a|c|bcd){3,10}(d*)'''
input = '''ababcd'''
matches = [[0, 6]]
[[tests]]
name = "repetition-long81"
options = ['escaped']
pattern = '''(ab|a|c|bcd){4,10}(d*)'''
input = '''ababcd'''
matches = []
[[tests]]
name = "repetition-long83"
options = ['escaped']
pattern = '''(ab|a|c|bcd)*(d*)'''
input = '''ababcd'''
matches = [[0, 6]]
[[tests]]
name = "repetition-long85"
options = ['escaped']
pattern = '''(ab|a|c|bcd)+(d*)'''
input = '''ababcd'''
matches = [[0, 6]]

View file

@ -0,0 +1,83 @@
NOTE implicit vs. explicit repetitions : 2009-02-02
# Glenn Fowler <gsf@research.att.com>
# conforming matches (column 4) must match one of the following BREs
# NOMATCH
# (0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)*
# (0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)*
# i.e., each 3-tuple has two identical elements and one (?,?)
E ((..)|(.)) NULL NOMATCH
E ((..)|(.))((..)|(.)) NULL NOMATCH
E ((..)|(.))((..)|(.))((..)|(.)) NULL NOMATCH
E ((..)|(.)){1} NULL NOMATCH
E ((..)|(.)){2} NULL NOMATCH
E ((..)|(.)){3} NULL NOMATCH
E ((..)|(.))* NULL (0,0)
E ((..)|(.)) a (0,1)(0,1)(?,?)(0,1)
E ((..)|(.))((..)|(.)) a NOMATCH
E ((..)|(.))((..)|(.))((..)|(.)) a NOMATCH
E ((..)|(.)){1} a (0,1)(0,1)(?,?)(0,1)
E ((..)|(.)){2} a NOMATCH
E ((..)|(.)){3} a NOMATCH
E ((..)|(.))* a (0,1)(0,1)(?,?)(0,1)
E ((..)|(.)) aa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.))((..)|(.)) aa (0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)
E ((..)|(.))((..)|(.))((..)|(.)) aa NOMATCH
E ((..)|(.)){1} aa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.)){2} aa (0,2)(1,2)(?,?)(1,2)
E ((..)|(.)){3} aa NOMATCH
E ((..)|(.))* aa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.)) aaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.))((..)|(.)) aaa (0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)
E ((..)|(.))((..)|(.))((..)|(.)) aaa (0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3)
E ((..)|(.)){1} aaa (0,2)(0,2)(0,2)(?,?)
#E ((..)|(.)){2} aaa (0,3)(2,3)(?,?)(2,3)
E ((..)|(.)){2} aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
E ((..)|(.)){3} aaa (0,3)(2,3)(?,?)(2,3)
#E ((..)|(.))* aaa (0,3)(2,3)(?,?)(2,3)
E ((..)|(.))* aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
E ((..)|(.)) aaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
E ((..)|(.))((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4)
E ((..)|(.)){1} aaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.)){2} aaaa (0,4)(2,4)(2,4)(?,?)
#E ((..)|(.)){3} aaaa (0,4)(3,4)(?,?)(3,4)
E ((..)|(.)){3} aaaa (0,4)(3,4)(0,2)(3,4) RE2/Go
E ((..)|(.))* aaaa (0,4)(2,4)(2,4)(?,?)
E ((..)|(.)) aaaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.))((..)|(.)) aaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
E ((..)|(.))((..)|(.))((..)|(.)) aaaaa (0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5)
E ((..)|(.)){1} aaaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.)){2} aaaaa (0,4)(2,4)(2,4)(?,?)
#E ((..)|(.)){3} aaaaa (0,5)(4,5)(?,?)(4,5)
E ((..)|(.)){3} aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
#E ((..)|(.))* aaaaa (0,5)(4,5)(?,?)(4,5)
E ((..)|(.))* aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
E ((..)|(.)) aaaaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.))((..)|(.)) aaaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
E ((..)|(.))((..)|(.))((..)|(.)) aaaaaa (0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?)
E ((..)|(.)){1} aaaaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.)){2} aaaaaa (0,4)(2,4)(2,4)(?,?)
E ((..)|(.)){3} aaaaaa (0,6)(4,6)(4,6)(?,?)
E ((..)|(.))* aaaaaa (0,6)(4,6)(4,6)(?,?)

View file

@ -0,0 +1,343 @@
[[tests]]
name = "repetition10"
options = ['escaped']
pattern = '''((..)|(.))'''
input = ''''''
matches = []
[[tests]]
name = "repetition11"
options = ['escaped']
pattern = '''((..)|(.))((..)|(.))'''
input = ''''''
matches = []
[[tests]]
name = "repetition12"
options = ['escaped']
pattern = '''((..)|(.))((..)|(.))((..)|(.))'''
input = ''''''
matches = []
[[tests]]
name = "repetition14"
options = ['escaped']
pattern = '''((..)|(.)){1}'''
input = ''''''
matches = []
[[tests]]
name = "repetition15"
options = ['escaped']
pattern = '''((..)|(.)){2}'''
input = ''''''
matches = []
[[tests]]
name = "repetition16"
options = ['escaped']
pattern = '''((..)|(.)){3}'''
input = ''''''
matches = []
[[tests]]
name = "repetition18"
options = ['escaped']
pattern = '''((..)|(.))*'''
input = ''''''
matches = [[0, 0]]
[[tests]]
name = "repetition20"
options = ['escaped']
pattern = '''((..)|(.))'''
input = '''a'''
matches = [[0, 1]]
[[tests]]
name = "repetition21"
options = ['escaped']
pattern = '''((..)|(.))((..)|(.))'''
input = '''a'''
matches = []
[[tests]]
name = "repetition22"
options = ['escaped']
pattern = '''((..)|(.))((..)|(.))((..)|(.))'''
input = '''a'''
matches = []
[[tests]]
name = "repetition24"
options = ['escaped']
pattern = '''((..)|(.)){1}'''
input = '''a'''
matches = [[0, 1]]
[[tests]]
name = "repetition25"
options = ['escaped']
pattern = '''((..)|(.)){2}'''
input = '''a'''
matches = []
[[tests]]
name = "repetition26"
options = ['escaped']
pattern = '''((..)|(.)){3}'''
input = '''a'''
matches = []
[[tests]]
name = "repetition28"
options = ['escaped']
pattern = '''((..)|(.))*'''
input = '''a'''
matches = [[0, 1]]
[[tests]]
name = "repetition30"
options = ['escaped']
pattern = '''((..)|(.))'''
input = '''aa'''
matches = [[0, 2]]
[[tests]]
name = "repetition31"
options = ['escaped']
pattern = '''((..)|(.))((..)|(.))'''
input = '''aa'''
matches = [[0, 2]]
[[tests]]
name = "repetition32"
options = ['escaped']
pattern = '''((..)|(.))((..)|(.))((..)|(.))'''
input = '''aa'''
matches = []
[[tests]]
name = "repetition34"
options = ['escaped']
pattern = '''((..)|(.)){1}'''
input = '''aa'''
matches = [[0, 2]]
[[tests]]
name = "repetition35"
options = ['escaped']
pattern = '''((..)|(.)){2}'''
input = '''aa'''
matches = [[0, 2]]
[[tests]]
name = "repetition36"
options = ['escaped']
pattern = '''((..)|(.)){3}'''
input = '''aa'''
matches = []
[[tests]]
name = "repetition38"
options = ['escaped']
pattern = '''((..)|(.))*'''
input = '''aa'''
matches = [[0, 2]]
[[tests]]
name = "repetition40"
options = ['escaped']
pattern = '''((..)|(.))'''
input = '''aaa'''
matches = [[0, 2]]
[[tests]]
name = "repetition41"
options = ['escaped']
pattern = '''((..)|(.))((..)|(.))'''
input = '''aaa'''
matches = [[0, 3]]
[[tests]]
name = "repetition42"
options = ['escaped']
pattern = '''((..)|(.))((..)|(.))((..)|(.))'''
input = '''aaa'''
matches = [[0, 3]]
[[tests]]
name = "repetition44"
options = ['escaped']
pattern = '''((..)|(.)){1}'''
input = '''aaa'''
matches = [[0, 2]]
[[tests]]
name = "repetition46"
options = ['escaped']
pattern = '''((..)|(.)){2}'''
input = '''aaa'''
matches = [[0, 3]]
[[tests]]
name = "repetition47"
options = ['escaped']
pattern = '''((..)|(.)){3}'''
input = '''aaa'''
matches = [[0, 3]]
[[tests]]
name = "repetition50"
options = ['escaped']
pattern = '''((..)|(.))*'''
input = '''aaa'''
matches = [[0, 3]]
[[tests]]
name = "repetition52"
options = ['escaped']
pattern = '''((..)|(.))'''
input = '''aaaa'''
matches = [[0, 2]]
[[tests]]
name = "repetition53"
options = ['escaped']
pattern = '''((..)|(.))((..)|(.))'''
input = '''aaaa'''
matches = [[0, 4]]
[[tests]]
name = "repetition54"
options = ['escaped']
pattern = '''((..)|(.))((..)|(.))((..)|(.))'''
input = '''aaaa'''
matches = [[0, 4]]
[[tests]]
name = "repetition56"
options = ['escaped']
pattern = '''((..)|(.)){1}'''
input = '''aaaa'''
matches = [[0, 2]]
[[tests]]
name = "repetition57"
options = ['escaped']
pattern = '''((..)|(.)){2}'''
input = '''aaaa'''
matches = [[0, 4]]
[[tests]]
name = "repetition59"
options = ['escaped']
pattern = '''((..)|(.)){3}'''
input = '''aaaa'''
matches = [[0, 4]]
[[tests]]
name = "repetition61"
options = ['escaped']
pattern = '''((..)|(.))*'''
input = '''aaaa'''
matches = [[0, 4]]
[[tests]]
name = "repetition63"
options = ['escaped']
pattern = '''((..)|(.))'''
input = '''aaaaa'''
matches = [[0, 2]]
[[tests]]
name = "repetition64"
options = ['escaped']
pattern = '''((..)|(.))((..)|(.))'''
input = '''aaaaa'''
matches = [[0, 4]]
[[tests]]
name = "repetition65"
options = ['escaped']
pattern = '''((..)|(.))((..)|(.))((..)|(.))'''
input = '''aaaaa'''
matches = [[0, 5]]
[[tests]]
name = "repetition67"
options = ['escaped']
pattern = '''((..)|(.)){1}'''
input = '''aaaaa'''
matches = [[0, 2]]
[[tests]]
name = "repetition68"
options = ['escaped']
pattern = '''((..)|(.)){2}'''
input = '''aaaaa'''
matches = [[0, 4]]
[[tests]]
name = "repetition70"
options = ['escaped']
pattern = '''((..)|(.)){3}'''
input = '''aaaaa'''
matches = [[0, 5]]
[[tests]]
name = "repetition73"
options = ['escaped']
pattern = '''((..)|(.))*'''
input = '''aaaaa'''
matches = [[0, 5]]
[[tests]]
name = "repetition75"
options = ['escaped']
pattern = '''((..)|(.))'''
input = '''aaaaaa'''
matches = [[0, 2]]
[[tests]]
name = "repetition76"
options = ['escaped']
pattern = '''((..)|(.))((..)|(.))'''
input = '''aaaaaa'''
matches = [[0, 4]]
[[tests]]
name = "repetition77"
options = ['escaped']
pattern = '''((..)|(.))((..)|(.))((..)|(.))'''
input = '''aaaaaa'''
matches = [[0, 6]]
[[tests]]
name = "repetition79"
options = ['escaped']
pattern = '''((..)|(.)){1}'''
input = '''aaaaaa'''
matches = [[0, 2]]
[[tests]]
name = "repetition80"
options = ['escaped']
pattern = '''((..)|(.)){2}'''
input = '''aaaaaa'''
matches = [[0, 4]]
[[tests]]
name = "repetition81"
options = ['escaped']
pattern = '''((..)|(.)){3}'''
input = '''aaaaaa'''
matches = [[0, 6]]
[[tests]]
name = "repetition83"
options = ['escaped']
pattern = '''((..)|(.))*'''
input = '''aaaaaa'''
matches = [[0, 6]]

View file

@ -0,0 +1,92 @@
[[tests]]
name = "iter1"
pattern = "a"
input = "aaa"
matches = [[0, 1], [1, 2], [2, 3]]
[[tests]]
name = "iter2"
pattern = "a"
input = "aba"
matches = [[0, 1], [2, 3]]
[[tests]]
name = "iter-empty1"
pattern = ''
input = ''
matches = [[0, 0]]
[[tests]]
name = "iter-empty2"
pattern = ''
input = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[tests]]
name = "iter-empty3"
pattern = '()'
input = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[tests]]
name = "iter-empty4"
pattern = '()*'
input = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[tests]]
name = "iter-empty5"
pattern = '()+'
input = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[tests]]
name = "iter-empty6"
pattern = '()?'
input = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[tests]]
name = "iter-empty7"
pattern = '()()'
input = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[tests]]
name = "iter-empty8"
pattern = '()+|z'
input = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[tests]]
name = "iter-empty9"
pattern = 'z|()+'
input = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[tests]]
name = "iter-empty10"
pattern = '()+|b'
input = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[tests]]
name = "iter-empty11"
pattern = 'b|()+'
input = 'abc'
matches = [[0, 0], [1, 2], [3, 3]]
[[tests]]
options = ["anchored"]
name = "iter-anchored1"
pattern = "a"
input = "a"
matches = [[0, 1]]
[[tests]]
options = ["anchored"]
name = "iter-anchored2"
pattern = "a"
input = "aa"
matches = [[0, 1]]

View file

@ -0,0 +1,138 @@
[[tests]]
name = "invalid-utf8-literal1"
options = ["escaped", "invalid-utf8", "no-unicode"]
pattern = '\xFF'
input = '\xFF'
matches = [[0, 1]]
[[tests]]
name = "no-unicode-mixed"
options = ["escaped", "invalid-utf8"]
pattern = '(.+)(?-u)(.+)'
input = '\xCE\x93\xCE\x94\xFF'
matches = [[0, 5]]
[[tests]]
name = "no-unicode-case1"
options = ["case-insensitive", "no-unicode"]
pattern = "a"
input = "A"
matches = [[0, 1]]
[[tests]]
name = "no-unicode-case2"
options = ["case-insensitive", "no-unicode"]
pattern = "[a-z]+"
input = "AaAaA"
matches = [[0, 5]]
[[tests]]
name = "no-unicode-case3"
options = ["case-insensitive"]
pattern = "[a-z]+"
input = "aA\u212AaA"
matches = [[0, 7]]
[[tests]]
name = "no-unicode-case4"
options = ["case-insensitive", "no-unicode"]
pattern = "[a-z]+"
input = "aA\u212AaA"
matches = [[0, 2]]
[[tests]]
name = "no-unicode-negate1"
options = []
pattern = "[^a]"
input = "δ"
matches = [[0, 2]]
[[tests]]
name = "no-unicode-negate2"
options = ["no-unicode", "invalid-utf8"]
pattern = "[^a]"
input = "δ"
matches = [[0, 1]]
[[tests]]
name = "no-unicode-dotstar-prefix1"
options = ["escaped", "no-unicode", "invalid-utf8"]
pattern = "a"
input = '\xFFa'
matches = [[1, 2]]
[[tests]]
name = "no-unicode-dotstar-prefix2"
options = ["escaped", "invalid-utf8"]
pattern = "a"
input = '\xFFa'
matches = [[1, 2]]
[[tests]]
name = "no-unicode-null-bytes1"
options = ["escaped", "no-unicode", "invalid-utf8"]
pattern = '[^\x00]+\x00'
input = 'foo\x00'
matches = [[0, 4]]
[[tests]]
name = "no-unicode1"
options = ["no-unicode"]
pattern = '\w+'
input = "aδ"
matches = [[0, 1]]
[[tests]]
name = "no-unicode2"
options = []
pattern = '\w+'
input = "aδ"
matches = [[0, 3]]
[[tests]]
name = "no-unicode3"
options = ["no-unicode"]
pattern = '\d+'
input = "1२३9"
matches = [[0, 1]]
[[tests]]
name = "no-unicode4"
pattern = '\d+'
input = "1२३9"
matches = [[0, 8]]
[[tests]]
name = "no-unicode5"
options = ["no-unicode"]
pattern = '\s+'
input = " \u1680"
matches = [[0, 1]]
[[tests]]
name = "no-unicode6"
pattern = '\s+'
input = " \u1680"
matches = [[0, 4]]
[[tests]]
# See: https://github.com/rust-lang/regex/issues/484
name = "no-unicode-iter1"
pattern = ''
input = "☃"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[tests]]
# See: https://github.com/rust-lang/regex/issues/484
options = ['escaped']
name = "no-unicode-iter2"
pattern = ''
input = 'b\xFFr'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]

View file

@ -0,0 +1,489 @@
[[tests]]
name = "unicode-literal1"
pattern = '☃'
input = "☃"
matches = [[0, 3]]
[[tests]]
name = "unicode-literal2"
pattern = '☃+'
input = "☃"
matches = [[0, 3]]
[[tests]]
name = "unicode-literal3"
options = ["case-insensitive"]
pattern = '☃+'
input = "☃"
matches = [[0, 3]]
[[tests]]
name = "unicode-literal4"
options = ["case-insensitive"]
pattern = 'Δ'
input = "δ"
matches = [[0, 2]]
[[tests]]
name = "unicode-class1"
pattern = '[☃Ⅰ]+'
input = "☃"
matches = [[0, 3]]
[[tests]]
name = "unicode-class2"
pattern = '\pN'
input = ""
matches = [[0, 3]]
[[tests]]
name = "unicode-class3"
pattern = '\pN+'
input = "1Ⅱ2"
matches = [[0, 8]]
[[tests]]
name = "unicode-class4"
pattern = '\PN+'
input = "ab"
matches = [[0, 2]]
[[tests]]
name = "unicode-class5"
pattern = '[\PN]+'
input = "ab"
matches = [[0, 2]]
[[tests]]
name = "unicode-class6"
pattern = '[^\PN]+'
input = "ab"
matches = [[2, 5]]
[[tests]]
name = "unicode-class7"
pattern = '\p{Lu}+'
input = "ΛΘΓΔα"
matches = [[0, 8]]
[[tests]]
name = "unicode-class8"
options = ["case-insensitive"]
pattern = '\p{Lu}+'
input = "ΛΘΓΔα"
matches = [[0, 10]]
[[tests]]
name = "unicode-class9"
pattern = '\pL+'
input = "ΛΘΓΔα"
matches = [[0, 10]]
[[tests]]
name = "unicode-class10"
pattern = '\p{Ll}+'
input = "ΛΘΓΔα"
matches = [[8, 10]]
[[tests]]
name = "unicode-perl1"
pattern = '\w+'
input = "dδd"
matches = [[0, 4]]
[[tests]]
name = "unicode-perl2"
pattern = '\w+'
input = "⥡"
matches = []
[[tests]]
name = "unicode-perl3"
pattern = '\W+'
input = "⥡"
matches = [[0, 3]]
[[tests]]
name = "unicode-perl4"
pattern = '\d+'
input = "1२३9"
matches = [[0, 8]]
[[tests]]
name = "unicode-perl5"
pattern = '\d+'
input = "Ⅱ"
matches = []
[[tests]]
name = "unicode-perl6"
pattern = '\D+'
input = "Ⅱ"
matches = [[0, 3]]
[[tests]]
name = "unicode-perl7"
pattern = '\s+'
input = ""
matches = [[0, 3]]
[[tests]]
name = "unicode-perl8"
pattern = '\s+'
input = "☃"
matches = []
[[tests]]
name = "unicode-perl9"
pattern = '\S+'
input = "☃"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat1"
pattern = '\p{Cased_Letter}'
input = ""
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat2"
pattern = '\p{Close_Punctuation}'
input = ""
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat3"
pattern = '\p{Connector_Punctuation}'
input = "⁀"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat4"
pattern = '\p{Control}'
input = "\u009F"
matches = [[0, 2]]
[[tests]]
name = "unicode-class-gencat5"
pattern = '\p{Currency_Symbol}'
input = "£"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat6"
pattern = '\p{Dash_Punctuation}'
input = "〰"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat7"
pattern = '\p{Decimal_Number}'
input = "𑓙"
matches = [[0, 4]]
[[tests]]
name = "unicode-class-gencat8"
pattern = '\p{Enclosing_Mark}'
input = "\uA672"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat9"
pattern = '\p{Final_Punctuation}'
input = "⸡"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat10"
pattern = '\p{Format}'
input = "\U000E007F"
matches = [[0, 4]]
[[tests]]
name = "unicode-class-gencat11"
pattern = '\p{Initial_Punctuation}'
input = "⸜"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat12"
pattern = '\p{Letter}'
input = "Έ"
matches = [[0, 2]]
[[tests]]
name = "unicode-class-gencat13"
pattern = '\p{Letter_Number}'
input = "ↂ"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat14"
pattern = '\p{Line_Separator}'
input = "\u2028"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat15"
pattern = '\p{Lowercase_Letter}'
input = "ϛ"
matches = [[0, 2]]
[[tests]]
name = "unicode-class-gencat16"
pattern = '\p{Mark}'
input = "\U000E01EF"
matches = [[0, 4]]
[[tests]]
name = "unicode-class-gencat17"
pattern = '\p{Math}'
input = ""
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat18"
pattern = '\p{Modifier_Letter}'
input = "𖭃"
matches = [[0, 4]]
[[tests]]
name = "unicode-class-gencat19"
pattern = '\p{Modifier_Symbol}'
input = "🏿"
matches = [[0, 4]]
[[tests]]
name = "unicode-class-gencat20"
pattern = '\p{Nonspacing_Mark}'
input = "\U0001E94A"
matches = [[0, 4]]
[[tests]]
name = "unicode-class-gencat21"
pattern = '\p{Number}'
input = "⓿"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat22"
pattern = '\p{Open_Punctuation}'
input = "⦅"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat23"
pattern = '\p{Other}'
input = "\u0BC9"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat24"
pattern = '\p{Other_Letter}'
input = "ꓷ"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat25"
pattern = '\p{Other_Number}'
input = "㉏"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat26"
pattern = '\p{Other_Punctuation}'
input = "𞥞"
matches = [[0, 4]]
[[tests]]
name = "unicode-class-gencat27"
pattern = '\p{Other_Symbol}'
input = "⅌"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat28"
pattern = '\p{Paragraph_Separator}'
input = "\u2029"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat29"
pattern = '\p{Private_Use}'
input = "\U0010FFFD"
matches = [[0, 4]]
[[tests]]
name = "unicode-class-gencat30"
pattern = '\p{Punctuation}'
input = "𑁍"
matches = [[0, 4]]
[[tests]]
name = "unicode-class-gencat31"
pattern = '\p{Separator}'
input = "\u3000"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat32"
pattern = '\p{Space_Separator}'
input = "\u205F"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat33"
pattern = '\p{Spacing_Mark}'
input = "\U00016F7E"
matches = [[0, 4]]
[[tests]]
name = "unicode-class-gencat34"
pattern = '\p{Symbol}'
input = "⯈"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat35"
pattern = '\p{Titlecase_Letter}'
input = "ῼ"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gencat36"
pattern = '\p{Unassigned}'
input = "\U0010FFFF"
matches = [[0, 4]]
[[tests]]
name = "unicode-class-gencat37"
pattern = '\p{Uppercase_Letter}'
input = "Ꝋ"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-emoji1"
pattern = '\p{Emoji}'
input = "\u23E9"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-emoji2"
pattern = '\p{emoji}'
input = "\U0001F21A"
matches = [[0, 4]]
[[tests]]
name = "unicode-class-emoji3"
pattern = '\p{extendedpictographic}'
input = "\U0001FA6E"
matches = [[0, 4]]
[[tests]]
name = "unicode-class-emoji4"
pattern = '\p{extendedpictographic}'
input = "\U0001FFFD"
matches = [[0, 4]]
[[tests]]
name = "unicode-class-gcb1"
pattern = '\p{grapheme_cluster_break=prepend}'
input = "\U00011D46"
matches = [[0, 4]]
[[tests]]
name = "unicode-class-gcb2"
pattern = '\p{gcb=regional_indicator}'
input = "\U0001F1E6"
matches = [[0, 4]]
[[tests]]
name = "unicode-class-gcb3"
pattern = '\p{gcb=ri}'
input = "\U0001F1E7"
matches = [[0, 4]]
[[tests]]
name = "unicode-class-gcb4"
pattern = '\p{regionalindicator}'
input = "\U0001F1FF"
matches = [[0, 4]]
[[tests]]
name = "unicode-class-gcb5"
pattern = '\p{gcb=lvt}'
input = "\uC989"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-gcb6"
pattern = '\p{gcb=zwj}'
input = "\u200D"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-word-break1"
pattern = '\p{word_break=Hebrew_Letter}'
input = "\uFB46"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-word-break2"
pattern = '\p{wb=hebrewletter}'
input = "\uFB46"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-word-break3"
pattern = '\p{wb=ExtendNumLet}'
input = "\uFF3F"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-word-break4"
pattern = '\p{wb=WSegSpace}'
input = "\u3000"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-word-break5"
pattern = '\p{wb=numeric}'
input = "\U0001E950"
matches = [[0, 4]]
[[tests]]
name = "unicode-class-sentence-break1"
pattern = '\p{sentence_break=Lower}'
input = "\u0469"
matches = [[0, 2]]
[[tests]]
name = "unicode-class-sentence-break2"
pattern = '\p{sb=lower}'
input = "\u0469"
matches = [[0, 2]]
[[tests]]
name = "unicode-class-sentence-break3"
pattern = '\p{sb=Close}'
input = "\uFF60"
matches = [[0, 3]]
[[tests]]
name = "unicode-class-sentence-break4"
pattern = '\p{sb=Close}'
input = "\U0001F677"
matches = [[0, 4]]
[[tests]]
name = "unicode-class-sentence-break5"
pattern = '\p{sb=SContinue}'
input = "\uFF64"
matches = [[0, 3]]