#match方法 import re content = "Hello 123 4567 World_This is a Regex Demo" print(len(content)) result = re.match('^Hello\s\d\d\d\s\d{4}\s\w{10}',content) print(result) print(result.group()) #正则所匹配的内容,匹配到Hello 123 4567 World_This print(result.span())
匹配目标
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
#匹配目标 import re content = "Hello 1234567 World_This is a Regex Demo" result = re.match('^Hello\s(\d+)\sWorld',content) #(\d+)表示匹配多个数字 print(result) print(result.group(0)) print(result.span())
#group的表达方式 ()正则把字符串分为三组 import re a = "123abc456" print re.search("([0-9]*)([a-z]*)([0-9]*)",a).group(0) #123abc456,返回整体 print re.search("([0-9]*)([a-z]*)([0-9]*)",a).group(1) #123 print re.search("([0-9]*)([a-z]*)([0-9]*)",a).group(2) #abc print re.search("([0-9]*)([a-z]*)([0-9]*)",a).group(3) #456
通用匹配
1 2 3 4 5 6 7
#通用匹配 import re content = "Hello 1234567 World_This is a Regex Demo" result = re.match('^Hello.*Demo$',content) #.*匹配任意,除了换行 print(result) print(result.group()) print(result.span())
#贪婪 import re content = "Hello 1234567 World_This is a Regex Demo" result = re.match('^He.*(\d+).*Demo$',content) #.*贪婪的匹配到了123456 print(result) print(result.group(1)) # #输出为7 print(result.span())
#匪贪婪 import re content = "Hello 1234567 World_This is a Regex Demo" result = re.match('^He.*?(\d+).*Demo$',content) #.*贪婪的匹配到了123456 print(result) print(result.group(1)) #输出为1234567 .*?匹配0到1个 print(result.span())
#非贪婪 import re content = "http://127.0.0.1/lala/1.txt" result = re.match('^http.*?lala/(.*?)',content) #(.*?)没有进行匹配,因为非贪婪尽可能少的匹配 print(result) print(result.group()) #http://127.0.0.1/lala/ print(result.span())
修饰符
1 2 3 4 5 6 7 8
#修饰符 import re content = '''Hello 1234567 World_This is a Regex Demo''' result = re.match('^He.*?(\d+).*Demo$',content,re.S) #添加re.S就可以匹配换行 print(result) print(result.group()) print(result.span())
转义字符
1 2 3 4 5 6 7
#转义字符 import re from unittest import result
content = '(lala)www.lala.com' result = re.match('\(lala\)www\.lala\.com', content) #加反斜杠匹配.()这几个字符 print(result)
search
search会一次扫描字符串,直到扫描到匹配的内容,如果扫描完还没有找到,就返回None
1 2 3 4 5 6 7 8 9
#search import re
content = 'Extra stings Hello 1234567 World_This is a Regex Demo Extra stings' result = re.search('Hello.*?(\d+).*?Demo', content) #result = re.search('Hello.*?(\d+).*?Demo', content) print(result) #因为match从字符串开始匹配,所以一开头就匹配失败,输出为none #search输出为 <re.Match object; span=(13, 53), match='Hello 1234567 World_This is a Regex Demo'>