import os import codecs, sys import chardet from chardet.universaldetector import UniversalDetector ############################################# ## Author: John Jiang ## Version: 0.9 ## License: To be determined ############################################# ## Usage ############################################# ## This should only be used for versions of ## Messenger Plus before June 2005 ## Place script in the a directory containing ## all the chatlogs and run it. ############################################# ## Example Email ############################################# ## From - Sat Apr 04 20:06:05 2009 ## Date: Sat, 4 Apr 2009 23:29:32 +1000 ## From: "John Meh" ## To: johnjiang88@hotmail.com ## Subject: Lol ## MIME-Version: 1.0 ## Content-Type: TEXT/HTML; charset=uft-8 ## Status: O ## X-Status: ## ## ## rofl rofl ############################################# debug = False days = {"Sunday":"Sun", "Monday":"Mon", "Tuesday":"Tue", "Wednesday":"Wed", "Thursday":"Thur", "Friday":"Fri", "Saturday":"Sat"} months = {"January":"Jan", "February":"Feb", "March":"Mar", "April":"Apr", "May":"May", "June":"Jun", "July":"Jul", "August":"Aug", "September":"Sep", "October":"Oct", "November":"Nov", "December":"Dec"} def convert_to_utf8(filename): file = open(filename, "rb").read() lol = chardet.detect(file) encoding = lol["encoding"] if encoding != "UTF-16LE": encoding = "windows-1252" file = file.decode(encoding) return unicode(file) ## Returns a list in the format [Day, Day, Month, Year] def parse_date(line): date = line.replace(",", "").split()[3:7] date[0] = days[date[0]] date[2] = months[date[2]] return date ##Returns 24hr time string def parse_time(line): time = line[1:12].split(" ") temp = time[0].split(":") if time[1] == "PM": temp[0] = str(int(temp[0]) + 12) time = ':'.join(temp) return time ##Converts each file into separate emails and returns the result def parse_txt(path): print path file = convert_to_utf8(path).split("\n") email = path.split("\\").pop().replace(".txt", "").replace(".old","").replace(" (2)", "").replace(" (3)", "") logs = []; convo = "" file[0] =u"." + file[0][2:] for line in file: if line.startswith(u".---"): logs.append("") else: logs[-1] += line + "\n" for i in range(0, len(logs), 2): date = parse_date(logs[i].split("\n")[0]) ##passes in the first line of the header time = parse_time(logs[i+1].split("\n")[0]) ##passes in the first line of the chat To = logs[i].split("\n")[2].split(")")[0].split("(")[1] ##retrieves the email of the user header = ' '.join(["From -", date[0], date[2], date[1], time, date[3], "\n"]) header += ' '.join(["Date:", date[0] + ",", date[1], date[2], date[3], time, "+1000", "\n"]) header += """From: %s To: %s Subject: Chat with %s MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=utf-8 Status: O X-Status:\n\n""" % (email, To, email) convo += header + logs[i+1] return convo def main(): path = "." output = codecs.open('chatlogs', 'w', 'utf-8') for root, dirs, files in os.walk(path): for name in files: if name.endswith("txt"): # and root.startswith(".\\2005\\Dece"):# and name.startswith("baisemoi"): convo = parse_txt(root + "\\" + name) if debug == False: output.write(convo) else: print(convo) output.close() if __name__ == "__main__": main()