python正则表达式学习代码，python正则表达式,Regexp.py'''

文章由Byrx.net分享于2019-03-23 11:03:37评论（33）
python正则表达式学习代码，python正则表达式,Regexp.py'''

Regexp.py
'''Created on 2012年11月28日@author: jiangxiaoqiang本篇关于Python中的正则表达式'''#coding = utf-8import redef regexpTest():    # match = re.search(pat, mystr)    # All of the pattern must be matched, but not all of the string    print("re.search()：")    mystr = 'an example word:cat!!'    match = re.search(r'word:\w\w\w', mystr) # r表示raw string，\不会被转义    # print(type(match)) # <class '_sre.SRE_Match'>    if(match):        print('Found: ' + match.group())    else:        print('Didn\'t found!')    print("-------------------------------------")    # 邮箱验证例子    print("邮箱验证：")    email_regexp = r'^[\w-]+(\.[\w-]+)*@([\w-]+\.)+[a-zA-Z]+$' # 检验某个邮箱不能省略^和$    email = 'feichexia@yahoo.com.cn'    email_match = re.search(email_regexp, email)    if(email_match):        print(email, 'is valid!')    else:        print(email, 'is not valid!')    print("-------------------------------------")    # re.findall()找到字符串中所有匹配子字符串    print("re.findall()：")    email_regexp = r'[\w\.-]+@[\w\.-]+'    email_str = 'purple alice@google.com， blah monkey bob@abc.com blah dishwasher'    email_matches = re.findall(email_regexp, email_str)    print(email_matches)    for email in email_matches:        print(email)    print("-------------------------------------")    # 上面的email_matches是['alice@google.com', 'bob@abc.com']    # 即字符串的list    # 继续看下面的元组，为什么正则相同，字符串相同，结果不同？自己找找看有啥不同，我开始困惑了很久    # 正则真的相同？O(∩_∩)O    print('re.findall()：元组')    mystr = 'purple alice@google.com， blah monkey bob@abc.com blah dishwasher'    tuples = re.findall(r'([\w\.-]+)@([\w\.-]+)', mystr)    print(tuples)      # [('alice', 'google.com'), ('bob', 'abc.com')]    # 即元组的list，其实是group(n)的元组    for t in tuples:        print('用户名:' + str(t[0]))        print('Host: ' + str(t[1]))    print("-------------------------------------")    # 相信你已经发现了不同了，答案就出在圆括号上，圆括号在正则表达式中被用来分组    # 指定正则匹配选项    # 包括这些选项：    # re.IGNORECASE 忽略大小写    # re.MULTILINEWithin     #    a string made of many lines, allow ^ and $ to match the start and end of each line. Normally ^/$ would just match the start and end of the whole string.    # re.DOTALL     #    allow dot (.) to match newline --     #    normally it matches anything but newline.     #    This can trip you up -- you think .* matches everything,     #    but by default it does not go past the end of a line.     #    Note that \s (whitespace) includes newlines,     #    so if you want to match a run of whitespace that may     #    include a newline, you can just use \s*    print("带额外匹配选项的正则匹配：")    multistr = '''Foo None what bar    Not know universe    True nong'''    regexp = r'no'    matches = re.findall(regexp, multistr, re.IGNORECASE)    print("共匹配个数：" + str(len(matches)))    print("-------------------------------------")    print("高级匹配规则之正向预搜索与反向预搜索：")    print("正向预搜索（或者正向预查），后面必须是：")    str2 = 'none know no'    regexp2 = r'no(?=w)' # no后面必须是w才匹配，括号中的内容不会被捕获    matches2 = re.findall(regexp2, str2)    for m in matches2:        print(m)    print("-------------------------------------")    print('正向预搜索（或者正向预查），后面必须不是：')    regexp3 = r'no(?!w)' # no后面不是w才匹配，括号中的内容不会被捕获    matches3 = re.findall(regexp3, str2)    for m in matches3:        print(m)    print("-------------------------------------")    print('负向预搜索（或者负向预查），前面必须是：')    regexp4 = r'(?<=k)no' # no前面必须是k才匹配，括号中的内容不会被捕获    matches4 = re.findall(regexp4, str2)    for m in matches4:        print(m)    print("-------------------------------------")    print('负向预搜索（或者负向预查），前面必须不是：')    regexp5 = r'(?<!k)no' # no前面必须不是k才匹配，括号中的内容不会被捕获    matches5 = re.findall(regexp5, str2)    for m in matches5:        print(m)    print("-------------------------------------")    # 贪婪匹配与非贪婪匹配    print("高级匹配规则之贪婪匹配与非贪婪匹配：")    # Google Python教程关于这个有一段比较清晰的阐述如下：（我翻译的）    '''            假如你想匹配每个HTML标签，HTML内容是: <b>foo</b> and <i>so on</i>            你想用'(<.*>)'来匹配每个HTML标签。那么它将首先匹配什么呢？           结果有点出乎意料，因为 .* 是贪婪匹配，它将匹配 b>foo</b> and <i>so on</i>           有一种方法可以让匹配是非贪婪的，那就是在后面加上 ?，比如 .*? 和 .+?，这样就使得它们是           非贪 婪的。           所以'(<.*?>)'将第一个匹配'<b>'，第二个匹配'</b>'...           匹配过程简单来说是这样的：找到'<'之后，马上向后查找是否有'>'。    *?最早来源于Perl。支持Perl正则语法的正则表达式称为Perl兼容正则。          如果不使用非贪婪匹配，还有另外一种解决方案，使用中括号。比如上面的例子可以用这个正则来完成：   '(<[^>]*?>)'         这也是一种经典做法！    '''    print("非贪婪匹配做法：")    html_str = '<b>foo</b> and <i>bar</i>'    regex_tag = '<.*?>'    tags = re.findall(regex_tag, html_str)    for t in tags:        print(t)    print("-------------------------------------")    print('中括号做法：')    regex_tag2 = '<[^>]*?>'    tags2 = re.findall(regex_tag2, html_str)    for t in tags2:        print(t)    print("-------------------------------------")    # re.sub(pattern, replacement, str)     # str中所有与pattern匹配的子串都被replacement替换    # replacement可以包含 '\1', '\2'，表示对分组的引用    # 即分别表示group(1), group(2)    print("re.sub()用法：")    print(re.sub(r'(\w+), (\w+)', r'\2, \1', "John, Smith")) # Smith, Johndef main():    regexpTest()if __name__ == '__main__':    main()
热门文章：
python正则表达式学习代码，python正则表达式,Regexp.py'''

python正则表达式学习代码，python正则表达式,Regexp.py'''

相关内容

最新python源码实例

python~HOT