러시아어 웹 사이트 (키릴 문자)를 구문 분석하고 mySQL DB에 데이터를 삽입하려고합니다. 파싱은 괜찮지 만 키릴 문자 때문에 DB에 데이터를 저장할 수 없습니다. 파이썬은 나에게이 오류를 준다 :
Traceback (most recent call last):
File "/Users/kr/PycharmProjects/education_py/vape_map.py", line 40, in <module>
print parse_shop_meta()
File "/Users/kr/PycharmProjects/education_py/vape_map.py", line 35, in parse_shop_meta
VALUES (%s, %s, %s, %s)""",(shop_title, shop_address, shop_phone, shop_site, shop_desc))
File "/Library/Python/2.7/site-packages/MySQLdb/cursors.py", line 210, in execute
query = query % args
TypeError: not all arguments converted during string formatting
내 코드 :
# -- coding: utf-8 --
import requests
from lxml.html import fromstring
import csv
import MySQLdb
db = MySQLdb.connect(host="localhost", user="root", passwd="***", db="vape_map", charset='utf8')
def get_shop_urls():
i = 1
all_shop_urls = []
while i < 2:
url = requests.get("http://vapemap.ru/shop/?city=%D0%9C%D0%BE%D1%81%D0%BA%D0%B2%D0%B0&page={}".format(i))
page_html = fromstring(url.content)
shop_urls = page_html.xpath('//h3[@class="title"]/a/@href')
all_shop_urls += shop_urls
i +=1
return all_shop_urls
def parse_shop_meta():
shops_meta = []
csvfile = open('vape_shops.csv', 'wb')
writer = csv.writer(csvfile, quotechar='|', quoting=csv.QUOTE_ALL)
cursor = db.cursor()
for shop in get_shop_urls():
url = requests.get("http://vapemap.ru{}".format(shop), 'utf-8')
page_html = fromstring(url.content)
shop_title = page_html.xpath('//h1[@class="title"]/text()')
shop_address = page_html.xpath('//div[@class="address"]/text()')
shop_phone = page_html.xpath('//div[@class="phone"]/a/text()')
shop_site = page_html.xpath('//div[@class="site"]/a/text()')
shop_desc = page_html.xpath('//div[@class="shop-desc"]/text()')
sql = """INSERT INTO vape_shops(title, address, phone, site, description)
VALUES (%s, %s, %s, %s)""",(shop_title, shop_address, shop_phone, shop_site, shop_desc)
cursor.execute(sql, (shop_title[0], shop_address[0], shop_phone[0], shop_site[0], shop_desc[0]))
db.commit()
db.close()
return shops_meta
print parse_shop_meta()
이 기사는 인터넷에서 수집됩니다. 재 인쇄 할 때 출처를 알려주십시오.
침해가 발생한 경우 연락 주시기 바랍니다[email protected] 삭제
몇 마디 만하겠습니다