python 字符问题'utf8' codec can't decode byte 0xe6 in position 0,utf80xe6,我用sphinx和dja
python 字符问题'utf8' codec can't decode byte 0xe6 in position 0,utf80xe6,我用sphinx和dja
我用sphinx和django做搜索,但是搜索的时候出现这种错误:
'utf8' codec can't decode byte 0xe6 in position 0: unexpected end of data
出错信息:
Environment:Request Method: GETRequest URL: http://www.zhima.so:233/search/%E6%89%93%E7%AE%97/Django Version: 1.8.1Python Version: 2.7.3Installed Applications:('django.contrib.admin', 'django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', 'search', 'web')Installed Middleware:('django.middleware.common.CommonMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', 'django.middleware.csrf.CsrfViewMiddleware', 'django.contrib.auth.middleware.AuthenticationMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'django.middleware.clickjacking.XFrameOptionsMiddleware', 'search.timermiddleware.TimerMiddleware')Traceback:File "/usr/local/lib/python2.7/dist-packages/django/core/handlers/base.py" in get_response 132. response = wrapped_callback(request, *callback_args, **callback_kwargs)File "/usr/local/lib/python2.7/dist-packages/django/utils/decorators.py" in _wrapped_view 110. response = view_func(request, *args, **kwargs)File "/root/ssbc/web/views.py" in search 69. keyword.decode('utf-8'), d['offset'], d['ps'], d['category'], d['sort'])File "/root/ssbc/search/models.py" in search 26. res = q.ask(subqueries=[q2])File "/usr/local/lib/python2.7/dist-packages/sphinxit/core/processor.py" in ask 287. return self.connector.execute(query_batch)File "/usr/local/lib/python2.7/dist-packages/sphinxit/core/connector.py" in execute 149. raise SphinxQLDriverException(e)Exception Type: SphinxQLDriverException at /search/打算/Exception Value: 'utf8' codec can't decode byte 0xe6 in position 0: unexpected end of data
views.py:
# coding: utf8import reimport datetimeimport sysimport urllibfrom django.http import Http404from django.views.decorators.cache import cache_pagefrom django.shortcuts import render, redirectfrom lib import politicsimport workers.metautilsfrom search.models import RecKeywords, Hashreload(sys)sys.setdefaultencoding('utf-8')re_punctuations = re.compile( u"。|,|,|!|…|!|《|》|<|>|\"|'|:|:|?|\?|、|\||“|”|‘|’|;|—|(|)|·|\(|\)| |\.|【|】|『|』|@|&|%|\^|\*|\+|\||<|>|~|`|\[|\]")@cache_page(600)def index(request): reclist = RecKeywords.objects.order_by('-order') return render(request, 'index.html', {'reclist': reclist})@cache_page(3600 * 24)def hash(request, h): try: res = Hash.objects.list_with_files([h]) j = res[0] except: raise Http404(sys.exc_info()[1]) d = {'info': j} d['keywords'] = list( set(re_punctuations.sub(u' ', d['info']['name']).split())) if 'files' in d['info']: d['info']['files'] = [ y for y in d['info']['files'] if not y['path'].startswith(u'_')] d['info']['files'].sort(key=lambda x: x['length'], reverse=True) d['magnet_url'] = 'magnet:?xt=urn:btih:' + d['info']['info_hash'] + \ '&' + urllib.urlencode({'dn': d['info']['name'].encode('utf8')}) d['download_url'] = 'http://www.haosou.com/s?' + \ urllib.urlencode( {'ie': 'utf-8', 'src': 'ssbc', 'q': d['info']['name'].encode('utf8')}) return render(request, 'info.html', d)@cache_page(1800)def search(request, keyword=None, p=None): if not keyword: return redirect('/') if politics.is_sensitive(keyword): return redirect('/?' + urllib.urlencode({'notallow': keyword.encode('utf8')})) keyword = keyword print keyword # keyword = unicode( keyword , errors='ignore') d = {'keyword': keyword} d['words'] = list(set(re_punctuations.sub(u' ', d['keyword']).split())) try: d['p'] = int(p or request.GET.get('p')) except: d['p'] = 1 d['category'] = request.GET.get('c', '') d['sort'] = request.GET.get('s', 'create_time') d['ps'] = 10 d['offset'] = d['ps'] * (d['p'] - 1) res = Hash.objects.search( keyword.decode('utf-8'), d['offset'], d['ps'], d['category'], d['sort']) print res d.update(res) # Fill info ids = [str(x['id']) for x in d['result']['items']] if ids: items = Hash.objects.list_with_files(ids) for x in d['result']['items']: for y in items: if x['id'] == y['id']: x.update(y) x['magnet_url'] = 'magnet:?xt=urn:btih:' + x['info_hash'] + \ '&' + \ urllib.urlencode({'dn': x['name'].encode('utf8')}) x['maybe_fake'] = x['name'].endswith( u'.rar') or u'BTtiantang.com' in x['name'] or u'liangzijie' in x['name'] if 'files' in x: x['files'] = [ z for z in x['files'] if not z['path'].startswith(u'_')][:5] x['files'].sort( key=lambda x: x['length'], reverse=True) else: x['files'] = [ {'path': x['name'], 'length': x['length']}] # pagination w = 10 total = int(d['result']['meta']['total_found']) d['page_max'] = total / \ d['ps'] if total % d['ps'] == 0 else total / d['ps'] + 1 d['prev_pages'] = range( max(d['p'] - w + min(int(w / 2), d['page_max'] - d['p']), 1), d['p']) d['next_pages'] = range( d['p'] + 1, int(min(d['page_max'] + 1, max(d['p'] - w / 2, 1) + w))) d['sort_navs'] = [ {'name': '按收录时间', 'value': 'create_time'}, {'name': '按文件大小', 'value': 'length'}, {'name': '按相关性', 'value': 'relavance'}, ] d['cats_navs'] = [{'name': '全部', 'num': total, 'value': ''}] for x in d['cats']['items']: v = workers.metautils.get_label_by_crc32(x['category']) d['cats_navs'].append( {'value': v, 'name': workers.metautils.get_label(v), 'num': x['num']}) return render(request, 'list.html', d)def hash_old(request, h): return redirect('/hash/' + h, permanent=True)def search_old(request, kw, p): return redirect('list', kw, p)@cache_page(3600 * 24)def howto(request): return render(request, 'howto.html', {})
编橙之家文章,
相关内容
- peewee postgres ArrayField 和JSONField怎么update数据,peeweepos
- 线上环境部署Django,线上部署django,请问大家是如何在线
- windows下python控制台输出中文乱码问题?,,在做爬虫的
- web2py db.define_table(),,中的format关键字参
- 自己写的Flask mysql操作类出错?,flaskmysql,操作类如下#
- 两个 js 加密函数写成 python 怎么写,jspython,function aes
- python爬虫正则表达式问题,python爬虫,scrapy爬虫,里面用
- Python通过SMTP发送邮件总是验证失败。,pythonsmtp,以下是
- flask问题。,flask问题,http://www.p
- python正则表达式怎么匹配这段中文?,,下面这段文字,
评论关闭