from urllib import request
import urllib
import re
#网页版有道翻译是ajax的post请求。
#浏览器请求地址Http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule
#但是在抓取时,路径有所变化,具体原因不明。后续要了解下 translate_o -》 translate
url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule"
#搜狗浏览器的 模拟浏览器
header = {"User-Agent":
"Mozilla/5.0 (windows NT 10.0; WOW64) AppleWEBKit/537.36 (Khtml, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0"
}
key = str(input("请输入要翻译的词语:"))
#key = "中国"
#这个fORMData是浏览器发送请求copy过来的
formData = {
"i":key,
"from":"AUTO",
"to":"AUTO",
"smartresult":"dict",
"client":"fanyideskweb",
"salt":"15536694414373",
"sign":"cddcc6c6092b57a04c546a2e075e2218",
"ts":"1553669441437",
"bv":"f355c521b6e13c15aa35c72a097b7786",
"doctype":"JSON",
"version":"2.1",
"keyfrom":"fanyi.web",
"action":"FY_BY_REALTlME",
"typoResult":"false"
}
data = urllib.parse.urlencode(formData).encode(encoding = 'utf-8')
#当请求中存在data时就属于POST请求
req = request.Request(url, data = data , headers= header)
result = request.urlopen(req).read().decode()
print(result)
#用正则取出有用数
pattern = r'"tgt":"(.*?)"}'
va = re.findall(pattern, result)
print(va)
0