#### 统计一个文本文件里各单词出现的次数
#### 单词简单定义为空白字符分隔的连续非空白字符

def text_stat(infname, statfile):
    worddict = {}
    num = 0
    textfile = open(infname)

    for line in textfile:
        wordlist = line.split()
        for word in wordlist:
            word = word.strip(",.:';!?()-_$/`~\"\\")
            if word == "":
                continue
            worddict[word] = worddict.get(word, 0) + 1
            num += 1
    textfile.close()

    outfile = open(statfile, "w")
    for word in sorted(worddict.keys()):
        outfile.write(word + ", " + str(worddict[word]) + "\n")
    outfile.close()
    return num, len(worddict)
#### end of textstat

if __name__ == "__main__":
    wn, dn = text_stat("sun1.txt", "sun-data.txt")
    print(wn, "words in the text.")
    print(dn, "different words in the text.")
    print("Finished, and the result is in sun-data.txt.")


#### 单词清理，消除无关字符，请自己定义
## word = clarify(word)
