Python学习代码——高级篇,,代码可直接复制到py


代码可直接复制到python文件中进行运行

# 1. 文件内创建函数# 内建函数和方法# open() 打开文件# read() 输入# readline() 输入一行# seek() 文件移动# write() 输出# close() 关闭文件# 写入文件,执行完成后生成txt文件file1 = open(‘name.txt‘, ‘w‘)file1.write("20200202")file1.close()# 读取文件file2 = open(‘name.txt‘)str = file2.read()print(str)file2.close()# 编辑文件file3 = open(‘name.txt‘, ‘a‘)# 字符中带\n输入进行换行file3.write("\n11111")file3.close()# 读取一行file4 = open(‘name.txt‘)print(file4.readline())file4.close()# 逐行读取file5 = open(‘name.txt‘)for str_1 in file5.readlines():    print(str_1)file5.close()# 操作完成之后鼠标指针行首file6 = open(‘name.txt‘)print(file6.readline())# 回到行首print(file6.seek(0))file6.close()# 2.python异常的检测和处理try:    a = 1 / 0except Exception as e:    print(‘捕获到的异常是 %s‘ % e)finally:    print(‘最终都会执行的语句‘)# 3.python的 可变参数def howLong(first, *other):    print(first)    print(other)howLong(‘123‘, ‘1222‘, ‘1111‘)# 4.函数的迭代器和生成器list1 = {1, 2, 3}it = iter(list1)# 迭代器next()print(next(it))print(next(it))print(next(it))def frange(start, stop, step):    x = start    while x < stop:        # 生成器关键字 yield        yield x        x += stepfor i in frange(10, 12, 0.5):    print(i)# 5.Lambda表达式:匿名函数add = lambda x, y: x + yprint(add(2, 4))# 6.python的内建函数a = [1, 2, 34, 5, 6]# filter():够快a中大于2的数print(list(filter(lambda x: x > 2, a)))# map():依次a中的数加一print(list(map(lambda x: x + 1, a)))# 多个列表处理:a,b中第一个元素相加b = [3, 4, 5, 9]print(list(map(lambda x, y: x + y, a, b)))# reduce使用需要引入:完成数字累加from functools import reduceprint(reduce(lambda x, y: x + y, [1, 2, 3], 4))# zip进行矩阵转换dicta = {‘aa‘: ‘a‘, ‘bb‘: ‘b‘, ‘cc‘: ‘c‘}dictc = zip(dicta.values(), dicta.keys())print(list(dictc))# 7. python 的闭包:嵌套函数def sum(a):    def add(b):        return a + b    return addnum27 = sum(2)print(num27(4))# 8.python多线程import threadingfrom threading import current_threadclass Mythread(threading.Thread):    def run(self):        print(current_thread().getName(), ‘start‘)        print(‘run‘)        print(current_thread().getName(), ‘start‘)t1 = Mythread()t1.start()t1.join()  # 线程同步print(current_thread().getName(), ‘end‘)# 9.python正则表达式re# . 匹配任意单个字符# ^ 以什么字符做开头# $ 以什么字符做结尾(从后向前进行匹配)# * 字符出现0~n次# + 前面字符出现1~N次# ? 前面字符出现0次或1次# {m} 前面字符出现m的次# {m,n} 前面字符出现m~n次# [] 中括号中任意一个字符匹配成功即可# | 字符选择左边或者右边# \d 匹配内容为数字# \D 匹配非数字# \s 匹配字符串# () 进行分组import rep = re.compile(‘.{3}‘)  # 任意字符出现三次print(p.match(‘d‘))p1 = re.compile(‘jpg$‘)  # 查找以jpg结尾的字符print(p1.match(‘d‘))p2 = re.compile(‘ca*‘)  # 查找以jpg结尾的字符print(p2.match(‘cat‘))p3 = re.compile(‘a{4}‘)  # 查找a出现4次print(p3.match(‘caaaat‘))p4 = re.compile(‘c[bcd]t‘)  # 出现bcd中任意一个print(p4.match(‘cat‘))# 分组p5 = re.compile(r‘(\d+)-(\d+)-(\d+)‘)print(p5.match(‘2019-02-02‘))  # 匹配日期print(p5.match(‘2019-02-02‘).group(1))  # 匹配年份year, month, day = p5.match(‘2019-02-02‘).groups()  # 匹配年份print(year, month, day)# match是完全匹配进行分组,search是进行字符匹配搜索print(p5.match(‘aaa2019-02-02‘))print(p5.search(‘aaa2019-02-02‘))# sub匹配替换phone = ‘123-456-789 # 这是电话号码‘print(re.sub(r‘#.*$‘, ‘‘, phone))  # 将警号后面替换为空print(re.sub(r‘\D‘, ‘‘, phone))  # 非数字替换为空# 10. python日期函数函数库# import timeprint(time.time())  # 1970年到现在的时间print(time.localtime())print(time.strftime(‘%Y-%m-%d %H:%M:%S‘))import datetime# datetime用作时间的修改print(datetime.datetime.now())new_time = datetime.timedelta(minutes=10)print(datetime.datetime.now() + new_time)  # 十分钟之后的时间one_day = datetime.datetime(2019, 9, 9)new_day = datetime.timedelta(days=10)print(one_day + new_day)# 11.网页数据采集与urllibfrom urllib import requesturl = ‘http://www.baidu.com‘response = request.urlopen(url, timeout=1)# print(response.read().decode(‘utf-8‘))# 12.GET和POST请求from urllib import parsefrom urllib import requestdata = bytes(parse.urlencode({‘world‘: ‘hello‘}), encoding=‘utf8‘)# print(data)response = request.urlopen(‘http://httpbin.org/post‘, data=data)# print(response.read().decode(‘utf-8‘))import urllibimport sockettry:    response2 = request.urlopen(‘http://httpbin.org/get‘, timeout=1)    # print(response2.read())except urllib.error.URLError as e:    if isinstance(e.reason, socket.timeout):        print("time out")# 13.python的requests库的使用# get请求import requestsurl2131 = ‘http://httpbin.org/get‘data2131 = {‘key‘: ‘value‘, ‘abc‘: ‘xyz‘}response2131 = requests.get(url2131, data2131)# print(response2131.text)# post请求url2132 = ‘http://httpbin.org/post‘data2132 = {‘key‘: ‘value‘, ‘abc‘: ‘xyz‘}response2132 = requests.post(url2132, data2132)# print(response2132.json())# 14.python的正则表达式爬取链接# import requests# import recontent = requests.get(‘http://www.cnu.cc/discoveryPage/hot-人像‘).text# print(content)patter2141 = re.compile(r‘<a href="(.*?)".*?title">(.*?)</div>‘, re.S)results2141 = re.findall(patter2141, content)# print(‘ssssss‘, results2141)for result2141 in results2141:    url2141, name2141 = result2141    # print(url2141, re.sub(‘\s‘, ‘‘, name2141))# 15.爬蟲使用beautiful Soup的安装使用# pip3 install bs4from bs4 import BeautifulSoupsoup = BeautifulSoup(content, ‘lxml‘)# print(soup.prettify())  # 格式化的处理# print(soup.title)  # 获取title# print(soup.title.string)  # 获取title# print(soup.p)  # 获取p标签# print(soup.a)  # 获取a标签# print(soup.find(id=‘link3‘))  # 获取id=link3的标签# 查找所有a标签的链接# for link in soup.find_all(‘a‘):#     print(link.get(‘href‘))# print(soup.get_text())  # 获取文档中所有文本内容# 16.爬虫网页标题# from bs4 import BeautifulSoup# import requestsheaders = {    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",    "Accept-Language": "zh-CN,zh;q=0.8",    "Connection": "close",    "Cookie": "_gauges_unique_hour=1; _gauges_unique_day=1; _gauges_unique_month=1; _gauges_unique_year=1; _gauges_unique=1",    "Referer": "http://www.infoq.com",    "Upgrade-Insecure-Requests": "1",    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER"}url2161 = ‘https://www.infoq.com/news/‘# 取得网页完整内容def craw(url2162):    response2162 = requests.get(url2162, headers=headers)    print(response2162.text)# craw(url2161)# 取得新闻标题def craw2(url2163):    response2163 = requests.get(url2163, headers=headers)    soup2163 = BeautifulSoup(response2163.text, ‘lxml‘)    for title_href in soup2163.find_all(‘div‘, class_=‘items__content‘):        print([title.get(‘title‘)               for title in title_href.find_all(‘a‘) if title.get(‘title‘)])# craw2(url2161)# # 翻页# for i in range(15, 46, 15):#     url2164 = ‘http://www.infoq.com/news/‘ + str(i)#     # print(url)#     craw2(url2164)# 17.python爬虫爬取图片下载from bs4 import BeautifulSoupimport requestsimport osimport shutilheaders = {    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",    "Accept-Language": "zh-CN,zh;q=0.8",    "Connection": "close",    "Cookie": "_gauges_unique_hour=1; _gauges_unique_day=1; _gauges_unique_month=1; _gauges_unique_year=1; _gauges_unique=1",    "Referer": "http://www.infoq.com",    "Upgrade-Insecure-Requests": "1",    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER"}url = ‘http://www.infoq.com/presentations‘# 下载图片# Requests 库封装复杂的接口,提供更人性化的 HTTP 客户端,但不直接提供下载文件的函数。# 需要通过为请求设置特殊参数 stream 来实现。当 stream 设为 True 时,# 上述请求只下载HTTP响应头,并保持连接处于打开状态,# 直到访问 Response.content 属性时才开始下载响应主体内容def download_jpg(image_url, image_localpath):    response = requests.get(image_url, stream=True)    if response.status_code == 200:        with open(image_localpath, ‘wb‘) as f:            response.raw.deconde_content = True            shutil.copyfileobj(response.raw, f)# 取得演讲图片def craw3(url):    response = requests.get(url, headers=headers)    soup = BeautifulSoup(response.text, ‘lxml‘)    for pic_href in soup.find_all(‘div‘, class_=‘items__content‘):        for pic in pic_href.find_all(‘img‘):            imgurl = pic.get(‘src‘)            dir = os.path.abspath(‘.‘)            filename = os.path.basename(imgurl)            imgpath = os.path.join(dir, filename)            print(‘开始下载 %s‘ % imgurl)            download_jpg(imgurl, imgpath)# craw3(url)# 翻页j = 0for i in range(12, 37, 12):    url = ‘http://www.infoq.com/presentations‘ + str(i)    j += 1    print(‘第 %d 页‘ % j)    craw3(url)

Python学习代码——高级篇

评论关闭