python就业班----正则表达式及re应用

    科技2022-09-03  112

    """正则表达式""" # 1.导入re模块 # import re # # 2.使用match方法进行匹配操作 # result=re.match(正则表达式,要匹配的字符串) # # 3.如果上一步匹配到数据的话,可以i使用group方法来提取数据 # result.group() """re模块示例(匹配以itcast开头的语句)""" # import re # result=re.match(r"itcast","itcast.cn") # print(result) #匹配时有返回值<re.Match object; span=(0, 6), match='itcast'>,否则没有返回值 # print(result.group()) # 提取数据 """匹配单个字符""" # import re # result=re.match(r"速度与激情1","速度与激情1") # print(result) # print(result.group()) # # # 匹配数字\d,一个\d等于一位数字 # result2=re.match(r"速度与激情\d","速度与激情2") # print(result2) # print(result2.group()) # # result3=re.match(r"速度与激情\d","速度与激情8") # print(result3) # print(result3.group()) # # # print(re.match(r"速度与激情[12345678]","速度与激情8").group()) # print(re.match(r"速度与激情[12345678]","速度与激情6").group()) # print(re.match(r"速度与激情[1-36-8]","速度与激情6").group()) # print(re.match(r"速度与激情[1-36-8]","速度与激情7").group()) # # # 匹配数字和字母 # print(re.match(r"速度与激情[1-8abcd]","速度与激情5").group()) # print(re.match(r"速度与激情[1-8abcd]","速度与激情a").group()) # # print(re.match(r"速度与激情[1-8abcd]","速度与激情e").group()) # 不能匹配 # print(re.match(r"速度与激情[1-8a-zA-Z]","速度与激情F").group()) # # #\w 匹配单词字符,即a-z,A-Z,0-9,且支持其他的语言-----慎用 # print(re.match(r"速度与激情\w","速度与激情F").group()) # print(re.match(r"速度与激情\w","速度与激情a").group()) # print(re.match(r"速度与激情\w","速度与激情1").group()) # print(re.match(r"速度与激情\w","速度与激情0").group()) # # #\s 匹配空白s # print(re.match(r"速度与激情 \d","速度与激情 1").group()) # print(re.match(r"速度与激情\s\d","速度与激情\t1").group()) # # # \D:匹配非数字 # # \S:匹配非空白 # # \W:匹配非单词字符 # # . :匹配任意单个字符 # print(re.match(r"速度与激情.","速度与激情1").group()) # print(re.match(r"速度与激情.","速度与激情a").group()) # print(re.match(r"速度与激情.","速度与激情!").group()) """匹配多个字符""" # {m,n}:匹配前一个字符出现从m到n次 import re # print(re.match(r"速度与激情\d{1,2}","速度与激情12").group()) # print(re.match(r"速度与激情\d{1,2}","速度与激情1").group()) # {}限制位数{1,2}:一位两位都可以 # print(re.match(r"速度与激情\d{1,3}","速度与激情123").group()) # {1,2,3} :一位两位三位都可以 # print(re.match(r"速度与激情\d{3}","速度与激情123").group()) # {3} :必须为三位,否则报错 # # print(re.match(r"速度与激情\d{3}","速度与激情13").group()) # {3} :必须为三位,否则报错 # ?:匹配前一个字符出现0次或者1次,即要么0次,要么1次 # print(re.match(r"021-\d{8}","021-12345678").group()) # print(re.match(r"021-?\d{8}","021-12345678").group()) # print(re.match(r"\d{3,4}-?\d{8}","0186-12345678").group()) # html_content="""fgdhjh # asdfghjk # wertyuio # wertyuio # werfghuiop' # sdrtyj uiojhh # wertyui' # wqertyui # yfuio # thjk""" # print(html_content) # . :匹配任意1个字符,除了\n换行之外 # print(re.match(r".*","asd").group()) # print(re.match(r".*","a1").group()) # print(re.match(r".*","").group()) # # # *:匹配任意多个,可以有也可以没有 # print(re.match(r".*",html_content).group()) # # #re.s:让点.包括/n # print(re.match(r".*",html_content,re.S).group()) # + :匹配前一个字符出现1次或者无限次,即至少有1次 # print(re.match(r".+","f").group()) # print(re.match(r".+","fty").group()) # print(re.match(r".+","fasdfgh").group()) # # print(re.match(r".+","").group()) #报错 """需求:匹配出,变量名是否有效""" import re # # names=["name1","_name","2_name","__name__"] # # for name in names: # #\w 匹配单词字符,即a-z,A-Z,0-9,且支持其他的语言 # ret=re.match("[a-zA-z_]+[\w]*",name) # if ret: # print("变量名%s符合要求:"%ret.group()) # else: # print("变量名%s非法"%name) # import re # # def main(): # names=["age","_age","1age","age1","a_age","age_1_","age!","a#123","_____"] # for name in names: # ret=re.match(r"[a-zA-Z_][a-zA-Z0-9_]*",name) # match可以判断以谁开头,不能判断以谁结尾 # if ret: # print("变量名:%s符合要求,通过正则匹配出来的是%s" %(name,ret.group())) # else: # print("变量名%s非法" % name) # # # # if __name__=="__main__": # main() """ ^:判断以谁开头 """ """判断结尾:$""" # import re # # def main(): # names=["age","_age","1age","age1","a_age","age_1_","age!","a#123","_____"] # for name in names: # # $:判断结尾 # ret=re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$",name) # if ret: # print("变量名:%s符合要求,通过正则匹配出来的是%s" %(name,ret.group())) # else: # print("变量名%s非法" % name) # # # # if __name__=="__main__": # main() """匹配163邮箱地址""" # import re # # def main(): # email=input("请输入一个邮箱地址:") # # 如果在正则表达式中用到了某些普通的字符:.?等,仅仅需要在他们前面添加一个反斜杠转义 # ret=re.match(r"[a-zA-Z_0-9]{4,20}@163\.com$",email) # if ret: # print("%s符合要求---"%email) # else: # print("%s不符合要求---"%email) # # # if __name__=="__main__": # main() """分组匹配""" # import re # 对多种邮箱进行判断 """ |:匹配左右任意一个表达式 (ab):将括号中字符作为一个分组 \num:引用分组num匹配到字符串 (?P<name>):分组起别名 (?P=name): 引用别名为name分组匹配到的字符串 """ # print(re.match(r"[a-zA-Z_0-9]{4,20}@(163|126|qq|google)\.com$","laowang@163.com").group()) # print(re.match(r"[a-zA-Z_0-9]{4,20}@(163|126)\.com$","laowang@126.com").group()) # # # 匹配成功后取部分内容:只需添加小括号取括号里的内容 # print(re.match(r"[a-zA-Z_0-9]{4,20}@(163|126)\.com$","laowang@126.com").group(1)) # 126 # print(re.match(r"([a-zA-Z_0-9]{4,20})@(163|126)\.com$","laowang@126.com").group(1)) # laowang # print(re.match(r"([a-zA-Z_0-9]{4,20})@(163|126)\.com$","laowang@126.com").group(2)) # 126 # \num:引用分组num匹配到字符串 # import re # html_str="<h1>hahahaha</h1>" # print(re.match(r"<(\w*)>.*</\1*>",html_str).group()) # html_str="<body><h1>hahhahah</h1></body>" # print(re.match(r"<(\w*)><(\w*)>.*</\2></\1>",html_str).group()) # print(re.match(r"<(?P<p1>\w*)><(?P<p2>\w*)>.*</(?P=p2)></(?P=p1)>",html_str).group()) """re模块的高级用法:python自带的""" # """1.search:不会从头匹配,但是一旦匹配一部分到就算匹配到了""" #需求:匹配文章阅读次数 # import re # print(re.search(r"\d+","阅读的次数为000099999").group()) # print(re.search(r"\d+","阅读的次数为99999,点赞数为:100").group()) # """2.findall:返回值是一个列表,返回的是数据""" # 需求:统计出python、c、c++相应文章的阅读次数 # import re # # ret=re.findall("\d+","python=9999,c=7890,c++=12345") # print(ret) #['9999', '7890', '12345'] # """3.sub将匹配到的数据进行替换""" # import re # print(re.sub(r"\d+",'998',"python=997")) # python=998 # print(re.sub(r"\d+",'998',"python=997,c++=1024")) # python=998,c++=998 """sub支持函数的调用""" # import re # # def add(temp): # strNum=temp.group() # num=int(strNum)+1 # return str(num) # # print(re.sub(r"\d+",add,'python=997')) # python=998 # # print(re.sub(r"\d+",add,'python=99')) # python=100 """split根据匹配进行切割字符串,并返回一个列表""" import re print(re.split(r":| ","info:xiaoming 33 shandong")) # ['info', 'xiaoming', '33', 'shandong']
    Processed: 0.012, SQL: 9