mirror of https://github.com/fxsjy/jieba.git

7 changed files with 130 additions and 14 deletions
-
8jieba/__init__.py
-
79jieba/posseg/__init__.py
-
13jieba/posseg/viterbi.py
-
2setup.py
-
2test/test.txt
-
20test/test_file.py
-
20test/test_pos_file.py
@ -0,0 +1,2 @@ |
|||
西三旗硅谷先锋小区半地下室出租,便宜可合租硅谷 |
|||
工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作 |
@ -0,0 +1,20 @@ |
|||
import urllib2 |
|||
import sys,time |
|||
import sys |
|||
sys.path.append("../") |
|||
import jieba |
|||
|
|||
url = sys.argv[1] |
|||
content = open(url,"rb").read() |
|||
t1 = time.time() |
|||
words = list(jieba.cut(content)) |
|||
|
|||
t2 = time.time() |
|||
tm_cost = t2-t1 |
|||
|
|||
log_f = open("1.log","wb") |
|||
for w in words: |
|||
print >> log_f, w.encode("gbk"), "/" , |
|||
|
|||
print 'speed' , len(content)/tm_cost, " bytes/second" |
|||
|
@ -0,0 +1,20 @@ |
|||
import urllib2 |
|||
import sys,time |
|||
import sys |
|||
sys.path.append("../") |
|||
import jieba.posseg as pseg |
|||
|
|||
url = sys.argv[1] |
|||
content = open(url,"rb").read() |
|||
t1 = time.time() |
|||
words = list(pseg.cut(content)) |
|||
|
|||
t2 = time.time() |
|||
tm_cost = t2-t1 |
|||
|
|||
log_f = open("1.log","wb") |
|||
for w in words: |
|||
print >> log_f, w.encode("gbk"), "/" , |
|||
|
|||
print 'speed' , len(content)/tm_cost, " bytes/second" |
|||
|
Write
Preview
Loading…
Cancel
Save
Reference in new issue