TA的每日心情 | 擦汗 2021-11-17 09:18 |
---|
签到天数: 79 天 [LV.6]常住居民II
|
发表于 2021-10-6 09:52:48
|
显示全部楼层
# `% H g$ T5 {7 H! m" r$ w! u(3) 在哪里加入- current_path = os.path.dirname(__file__);current_path+"/OALD4_azure.txt
复制代码
( G! }2 C n. k; i
/ A% C- f, V6 X4 U! k我的版本是 3.9.7 ,目前没有遇到 No such file or directory 报错。 然后是genMDX_ox4.py 文件有部分中文乱码# k9 Y- a- K/ R u% ]
' z; D3 _6 i2 ]$ L, k- # -*- coding: utf-8 -*-
7 M" ^1 Q. O8 Y- s, m4 j - # encoding=utf8. Q3 m! Q: J' G2 t
* e% K$ Z7 G. n) T i) u' m( w" a- from __future__ import unicode_literals,print_function, absolute_import, division+ }9 n1 T. j" k9 r9 y0 @8 \- S/ r
- 5 ^4 b& d% |' v2 ~ [- q
- ( t7 ]. Z. h5 ^
- import re
3 r$ @' d- g: M- s% z j - import copy6 }0 [/ T7 e9 l3 f1 h+ @* h# |
- import chardet
, a" s* l6 I) o& k9 g3 w - % l! g( D% g7 L" m( m" s
- import os1 E2 b! R9 L% D: v: a
- import io
i: T; F, x6 e - import sys
8 c( i$ a7 P/ R - # reload(sys)
6 ?' m/ f: M5 S, W% \# z: D5 r4 X - # sys.setdefaultencoding('utf-8')' @1 f1 a& K& |1 s# B
- " [ j# J7 A' @' p
- import collections1 @ I5 O' A1 ]9 n I9 r
- from collections import defaultdict
% n% z1 w" Q* ~1 S% W
$ ?7 v5 D3 Q s
9 O9 M8 X3 {- h) X' S+ O- from writemdict import MDictWriter, encrypt_key0 ~$ |; U5 E2 i: D* C8 W
- from ripemd128 import ripemd128
: D3 d, k8 R% T0 H6 E+ V
- R0 D& K6 E' G8 L* l
% m. c% H4 Q+ e- R, u/ G/ w- head = 0
0 y0 n6 B& q+ z) B - new_mean =[], b/ G4 b3 o: n* \
- f=io.open('OALD4_azure.txt', 'r',encoding='utf-8')5 s0 b1 n8 j" v* f
- #f=io.open('oxford2_original.txt', 'r',encoding='utf-8')6 e a/ B& F. a- I5 m P0 x# e2 f1 @3 G
- d = defaultdict(list) #����һ�����ֵ䣬Ҳ��ʹ��{}������% ?+ M5 f; n5 e8 B$ C4 v9 s: [! i
- for line in f: #ÿ�δ�f�ж���һ��
: V d8 M) Q* ` w! r6 d - line=line.rstrip('\n')#ȥ����β�Ļ��з�7 \ s& a: L2 q! G: u
- if line == '</>':+ {8 O) Q! l+ p3 _ [4 Z4 c9 m8 L
- if head == 2:
. L; G) o" l6 Z - new_mean[0:] = ["".join(new_mean[0:])]; W) ]1 V3 ?, ]& v! o4 h# X8 h
- d[word].append(new_mean[0])' H& N8 f6 l. w0 P1 y- w
- head = 1;# {, j7 [- z$ _
- new_mean =[]2 v0 Y+ y* a0 X# J' K
- elif head == 1:
, K, p K! J/ P' n) W7 _9 I+ f& j2 _ - word = line% W; k* v3 Q# Y3 `6 o% [/ x( h$ M
- head = 2
9 q+ D' X$ k# ?4 b0 d1 Y - elif head == 2:
# r% w2 T8 b8 F4 h) E - new_mean.append(line). n- p! e8 k) U8 R2 F
- head = 2
/ m; P* P8 x6 X - f.close()" Q( M& b) ]4 D3 x' X3 _7 u( y
/ {0 G# D' h/ \' U" h7 `. Y
1 U* G3 Y: e2 G& `- ff=io.open('about_OX4.txt', 'r',encoding='utf-8')#�ʵ�about��Ϣ��txt�ļ��뱣��Ϊutf-88 ~, s; P/ J, e. D( g
- about=[]
) Q6 s# A) J+ Q5 B" T; Q# z$ } - for line in ff: #ÿ�δ�f�ж���һ��# P' _# b+ ^4 U
- about.append(line)
! j p! ~+ V2 P9 E- \, B - about[0:] = ["".join(about[0:])]
- {) g- J& q, k9 O) j( j( s0 ]5 e
8 [4 H9 D; p$ L1 @( y |
9 V' G4 ?9 Q: d- #outfile = open("example_output/��ţ��Beta_V2.2.1.mdx", "wb"); _- V5 |4 U# Z1 J) U
- #writer = MDictWriter(d, "��ţ��Beta_V2.2.1", about[0])5 W1 b' p) x5 P
- outfile = open("output_ox4/OALD4_Ex.mdx", "wb")
" z; |8 u) q7 p1 J - writer = MDictWriter(d, "ţ��߽�˫��(���İ�)", about[0])# s& I+ k& w, L
- writer.write(outfile)
; }' @9 V3 o K, b/ o: H' F! ~ - outfile.close()* D- g1 u* J, D# t+ W
, @1 H* H! }) Y: \: P. u
复制代码 3 J1 s8 V7 g* H
1 K! m+ F' j n2 g9 w
+ P7 k7 A( f( w* m: d9 V9 I) V是否可以看看你的文档呢 乱码的部分中文写的是什么? |
|