For data that requires a large amount of translation, manual translation is too slow, and software is required for batch translation.
1. Use 360's translation
def fanyi_word_cn(string):
url="https://fanyi.so.com/index/search"
# db_path ='./db/tasks.db'
Form_Data={}
# Enter the English to be translated here
Form_Data['query']= string
Form_Data['eng']='1'
# Use urlencode to turn the dictionary into a string,#The server does not accept dictionaries, only strings and binary
data= parse.urlencode(Form_Data).encode('utf-8')
# After changing to the data identifiable by the server, request and get the response data
response= request.urlopen(url, data)
html= response.read().decode("utf-8")#Decoding method
# Objects (collections) and arrays (elements are collections) in java,loads can be converted to Python dictionary
result= json.loads(html)
# The dictionary retrieves the key name fanyi under the key name data,Get its value
translate_result= result["data"]["fanyi"]
# print(translate_result)return translate_result
2. Use Google's own API to translate
Note that you need to install the API module.
pip install translator
# google api, per 1000 words everyday
def translate_cn_api(content):
translator=Translator(to_lang="zh")
translation = translator.translate(content)return translation
**3. Google Translate is used to do this because the API provided by Google has a byte limit and can only translate 1000 words per day. **
Remarks: Environmental preparation
3.1 java environment
3.2 Install execjs module
pip install PyExecJS
3.3 Two implementation modules
HandleJs.py
# coding=utf-8import execjs
classPy4Js():
def __init__(self):
self.ctx = execjs.compile("""
functionTL(a){var k ="";var b =406644;var b1 =3293161072;var jd =".";var $b ="+-a^+6";var Zb ="+-3^+b+-f";for(var e =[], f =0, g =0; g < a.length; g++){var m = a.charCodeAt(g);128 m ? e[f++]= m :(2048 m ? e[f++]= m 6|192:(55296==(m &64512)&& g +1< a.length &&56320==(a.charCodeAt(g +1)&64512)?(m =65536+((m &1023)<<10)+(a.charCodeAt(++g)&1023),
e[f++]= m 18|240,
e[f++]= m 12&63|128): e[f++]= m 12|224,
e[f++]= m 6&63|128),
e[f++]= m &63|128)}
a = b;for(f =0; f < e.length; f++) a += e[f],
a =RL(a, $b);
a =RL(a, Zb);
a ^= b1 ||0;0 a &&(a =(a &2147483647)+2147483648);
a %=1E6;return a.toString()+ jd +(a ^ b)};functionRL(a, b){var t ="a";var Yb ="+";for(var c =0; c < b.length -2; c +=3){var d = b.charAt(c +2),
d = d = t ? d.charCodeAt(0)-87:Number(d),
d = b.charAt(c +1)== Yb ? a d: a << d;
a = b.charAt(c)== Yb ? a + d &4294967295: a ^ d
} return a
}""")
def getTk(self,text):return self.ctx.call("TL",text)
main.py
# coding=utf-8
# import urllib.request
import urllib2
from HandleJs import Py4Js
from translate import Translator
import requests
# Example:find_last('aaaa','a') returns 3
# Make sure your procedure has a return statement.
def find_last(string,str):
last_position=-1while True:
position=string.find(str,last_position+1)if position==-1:return last_position
last_position=position
def open_url(url):
headers ={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
req = urllib2.Request(url = url,headers=headers)
response = urllib2.urlopen(req)
data = response.read().decode('utf-8')return data
def translate_core(content,tk, language):iflen(content)4891:print("too long byte 4891")return
content = urllib2.quote(content)if language =='de':
url ="http://translate.google.cn/translate_a/single?client=t"+"&sl=de&tl=zh-CN&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca"+"&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&clearbtn=1&otf=1&pc=1"+"&srcrom=0&ssel=0&tsel=0&kc=2&tk=%s&q=%s"%(tk,content)else:
url ="http://translate.google.cn/translate_a/single?client=t"+"&sl=en&tl=zh-CN&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca"+"&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&clearbtn=1&otf=1&pc=1"+"&srcrom=0&ssel=0&tsel=0&kc=2&tk=%s&q=%s"%(tk,content)
# result is in json format
result =open_url(url)
# print('results:'+ result)iflen(content)<10:
end = result.find("\",")if end 4:return result[4:end]else:
result_all =''if language =='de':
result_all = result.split(',null,"de",null,null,')[0].replace('[[','').replace(']]',']')[1:]else:
result_all = result.split(',null,"en",null,null,')[0].replace('[[','').replace(']]',']')[1:]
# print('result_all:'+ result_all)
output_cn =''
# Parse and splice Chinese fields
list = result_all.split('],[')for i inrange(len(list)-1):
end = list[i].find("\",")
tmp_buf = list[i][1:end]
output_cn = output_cn + tmp_buf
return output_cn
def translate_normal(content, language):
js =Py4Js()
tk = js.getTk(content)
# print('english:'+ content)
cn_buf =translate_core(content,tk, language)
# print('Chinese:'+ cn_buf)return cn_buf
def translate_cn(content, language):
LEN_LIMIT =4891
all_len =len(content)print('en:'+ content)if all_len LEN_LIMIT:
content_cn =''while True:
content_limit = content[0:LEN_LIMIT]
limit_end =find_last(content_limit,'.')+1
# print('limit_end:'+str(limit_end))if limit_end ==0:
limit_end =find_last(content_limit,' ')+1if limit_end ==0:
limit_end = LEN_LIMIT
content_en = content[0:limit_end]
leave_len = all_len - limit_end
if content_en =='':break;
# print('content_en:'+ content_en)
content_cn = content_cn +translate_normal(content_en, language);
content = content[limit_end:]return content_cn
else:returntranslate_normal(content, language)
# google api, per 1000 words everyday
def translate_cn_api(content):
translator=Translator(to_lang="zh")
translation = translator.translate(content)return translation
if __name__ =="__main__":
content ="""Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Namespaces are one honking great idea --let's do more of those!"""
#
content ="""
IT-Grundschutz M5.131: Absicherung von IP-Protokollen unter Windows Server 2003."""
# content ='High'
content ="""Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Namespaces are one honking great idea --let's do more of those!"""
language ='en'
test =translate_cn(content.replace('\n',''), language)print('ok:'+ test)
# content ='Checks version'
Translation of German into Chinese and English into Chinese is realized here.
The above is the whole content of this article, I hope it will be helpful to everyone's study.
Recommended Posts