| import argparse | |
| import re | |
| def extract_ascii_strings(data, min_length=4): | |
| ascii_strings = re.findall(rb'[\x20-\x7E]{%d,}' % min_length, data) | |
| return [s.decode('ascii', errors='ignore') for s in ascii_strings] | |
| def extract_unicode_strings(data, min_length=4): | |
| unicode_strings = re.findall((rb'(?:[\x20-\x7E]\x00){%d,}' % min_length), data) | |
| return [s.decode('utf-16le', errors='ignore') for s in unicode_strings] | |
| def extract_strings_from_dll(dll_path): | |
| with open(dll_path, 'rb') as f: | |
| data = f.read() | |
| ascii_strings = extract_ascii_strings(data) | |
| unicode_strings = extract_unicode_strings(data) | |
| return ascii_strings + unicode_strings | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description="Extract text from a DLL file.") | |
| parser.add_argument("dll_path", help="Path to the DLL file") | |
| parser.add_argument("-o", "--output", help="Output file to save extracted text", default=None) | |
| args = parser.parse_args() | |
| extracted_text = extract_strings_from_dll(args.dll_path) | |
| if args.output: | |
| with open(args.output, "w", encoding="utf-8") as out_file: | |
| out_file.write("\n".join(extracted_text)) | |
| print(f"Extracted text saved to {args.output}") | |
| else: | |
| for line in extracted_text: | |
| print(line) | |