“结巴”中文分词:做最好的 Python 中文分词组件
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

17 lines
548 B

  1. #encoding=utf-8
  2. import sys
  3. sys.path.append("../")
  4. import jieba
  5. seg_list = jieba.cut("我来到北京清华大学",cut_all=True)
  6. print "Full Mode:", "/ ".join(seg_list) #全模式
  7. seg_list = jieba.cut("我来到北京清华大学",cut_all=False)
  8. print "Default Mode:", "/ ".join(seg_list) #默认模式
  9. seg_list = jieba.cut("他来到了网易杭研大厦")
  10. print ", ".join(seg_list)
  11. seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所,后在日本京都大学深造") #搜索引擎模式
  12. print ", ".join(seg_list)