TA的每日心情 | 擦汗 2021-11-17 09:18 |
---|
签到天数: 79 天 [LV.6]常住居民II
|
发表于 2021-10-6 09:52:48
|
显示全部楼层
/ b7 i7 a p$ ?0 A
(3) 在哪里加入- current_path = os.path.dirname(__file__);current_path+"/OALD4_azure.txt
复制代码 & _. ?% i. H2 R8 v2 [# O: G1 C' J
5 @1 n5 F, z( `1 m
我的版本是 3.9.7 ,目前没有遇到 No such file or directory 报错。 然后是genMDX_ox4.py 文件有部分中文乱码
' }6 h1 h6 p( j& G( y" j+ g
4 N; a, D c& |/ U' E9 e- # -*- coding: utf-8 -*-
: V) p. W" C9 m; c) {# h - # encoding=utf8
, E# o- A0 y" _( X% x2 a
$ O3 g6 q/ t/ E# o1 V- from __future__ import unicode_literals,print_function, absolute_import, division* ?6 C+ s' `& A( s% s
- % {4 }" z' y+ I5 q
$ h6 C. Z& [6 L- import re3 Z/ R) }- t& ?5 I
- import copy
" o6 ~* D! M) G+ F9 c) X- a; S3 z2 x9 \ - import chardet
' n4 o9 }% G1 G# N u - 3 f: V, H+ W) s7 l8 E; K* ?
- import os
1 N( q: N9 P7 x - import io1 \( }! Z; ]) f2 y; D1 f3 f( n: K* n
- import sys+ d0 N: l0 K5 z0 A1 O/ r
- # reload(sys)6 P8 E) w x1 {8 `5 {' i1 V) H
- # sys.setdefaultencoding('utf-8')
, l- T$ j( q' W6 P8 G' c+ x3 G+ R# c
) g8 E/ z* Y8 A3 [) l8 M! r1 |- import collections+ b4 j0 H5 o- w- v$ B
- from collections import defaultdict
& a) h z( s; w! c! l) o
2 x9 ^5 F. d& x* F- $ |, u# e# F& S8 B6 D
- from writemdict import MDictWriter, encrypt_key; i) O3 s \- Y+ [
- from ripemd128 import ripemd128 `( X8 Q3 s6 w8 h' c; G; _9 {1 p
7 {# B$ N; r0 _' Y; f* s8 V7 J7 _- 7 C7 O* ~) w3 o/ e! z! E' q% s
- head = 0# R$ x6 E9 p, k+ X3 e
- new_mean =[]) @( U9 u* u% B: }0 I
- f=io.open('OALD4_azure.txt', 'r',encoding='utf-8')
* j! e) h6 U2 g/ `& c - #f=io.open('oxford2_original.txt', 'r',encoding='utf-8')
. t% I- r$ ^) Z6 x. U/ \/ o7 T - d = defaultdict(list) #����һ�����ֵ䣬Ҳ��ʹ��{}������
" }+ H* T* g' P" ?, e2 S4 T - for line in f: #ÿ�δ�f�ж���һ��
9 J8 u* l) h1 w- ]5 c) @" | - line=line.rstrip('\n')#ȥ����β�Ļ��з�0 m% ]/ U! V; Z
- if line == '</>':
/ T# l4 P Q; O; q) H2 ?; K - if head == 2:
: s. v2 U% h& g! x. R3 W; O - new_mean[0:] = ["".join(new_mean[0:])]1 d+ O: x1 P; D. T: }
- d[word].append(new_mean[0])
9 o7 x* h3 q. z* u7 B+ v2 F - head = 1;
: c: c+ o. G& r3 w - new_mean =[]4 y4 m, F( C8 ^- F4 c
- elif head == 1:
4 v( n0 X+ o" s& V8 j! u3 U - word = line
$ X+ m+ r5 T( b7 s - head = 2' F! L$ v K9 N5 _4 j
- elif head == 2:* V5 c( W" O V6 i2 j' ]
- new_mean.append(line)3 X! b5 [0 d7 \# A% J
- head = 2
% P$ l' b/ t. ?" V! a8 X - f.close()
' L4 g7 n+ L$ u" T/ V6 n3 b - ) l: m" Z) a* q
: T7 l" q1 t% ~, T- ff=io.open('about_OX4.txt', 'r',encoding='utf-8')#�ʵ�about��Ϣ��txt�ļ��뱣��Ϊutf-8! Q0 z- r D& ]. Z$ m" O+ R/ Y
- about=[]
; q8 R' d; f ^( s2 w! }5 r - for line in ff: #ÿ�δ�f�ж���һ��
, x7 j* k" \# G# }1 s - about.append(line)( a1 S9 h D |6 f( g
- about[0:] = ["".join(about[0:])]
! O. e( s0 I( f: M% Z- t - . m1 ?6 W: w6 ?4 L# @9 I
- ' t( p3 H+ M7 {
- #outfile = open("example_output/��ţ��Beta_V2.2.1.mdx", "wb")
; J X, t- U5 A& B - #writer = MDictWriter(d, "��ţ��Beta_V2.2.1", about[0])
5 D/ Y8 }6 j- D, U- y2 r* j. L# S - outfile = open("output_ox4/OALD4_Ex.mdx", "wb")( o5 m: K" E0 Y+ p8 R% k g& J
- writer = MDictWriter(d, "ţ��߽�˫��(���İ�)", about[0])
$ y8 d* n" b y5 c0 \ - writer.write(outfile)
- k7 c. I0 S! R! C - outfile.close()0 t3 z+ Q& d' \
/ I7 |2 A4 m8 `
复制代码 6 }- g Q+ D/ V a- [- u) Z, X
1 n/ _ z1 Q7 R5 }! C6 B0 M/ g9 |
7 T/ b# u& F4 f是否可以看看你的文档呢 乱码的部分中文写的是什么? |
|