Write good scripts and leave work early! In addition to writing program codes, programmers’ daily work inevitably requires dealing with related testing and verification work.

For example, if you cannot access a website, you need to determine whether the address is accessible, what the server returns, and then determine what the problem is. To complete this task, if you blindly hope to use a compiled language to write such code, the time and energy in practice are not enough, and this is when you need to play the magical role of scripts!

It is no exaggeration to say that whether you can write efficient and practical script codes directly affects a programmer’s happy life [off-get off work time]. Here are 8 practical Python scripts. You can modify them and use them directly when you need them. I recommend you to save them!

1. Solve the problem of garbled characters in unzip under Linux.

import os
import sys
import zipfile
import argparse

s = '\x1b[%d;%dm%s\x1b[0m'       

def unzip(path):

    file = zipfile.ZipFile(path,"r")
    if args.secret:
        file.setpassword(args.secret)

    for name in file.namelist():
        try:
            utf8name=name.decode('gbk')
            pathname = os.path.dirname(utf8name)
        except:
            utf8name=name
            pathname = os.path.dirname(utf8name)

        #print s % (1, 92, '  >> extracting:'), utf8name
        #pathname = os.path.dirname(utf8name)
        if not os.path.exists(pathname) and pathname != "":
            os.makedirs(pathname)
        data = file.read(name)
        if not os.path.exists(utf8name):
            try:
                fo = open(utf8name, "w")
                fo.write(data)
                fo.close
            except:
                pass
    file.close()

def main(argv):
    ######################################################
    # for argparse
    p = argparse.ArgumentParser(description='')
    p.add_argument('xxx', type=str, nargs='*', \
        help='.')
    p.add_argument('-s', '--secret', action='store', \
        default=None, help='')
    global args
    args = p.parse_args(argv[1:])
    xxx = args.xxx

    for path in xxx:
        if path.endswith('.zip'):
            if os.path.exists(path):
                print s % (1, 97, '  ++ unzip:'), path
                unzip(path)
            else:
                print s % (1, 91, '  !! file doesn\'t exist.'), path
        else:
            print s % (1, 91, '  !! file isn\'t a zip file.'), path

if __name__ == '__main__':
    argv = sys.argv
    main(argv)

2. Count the number of code lines in the current root directory.

# coding=utf-8
import os
import time
basedir = './'
filelists = []

whitelist = ['cpp', 'h']

def getFile(basedir):
    global filelists
    for parent,dirnames,filenames in os.walk(basedir):
        for filename in filenames:
            ext = filename.split('.')[-1]
           
            if ext in whitelist:
                filelists.append(os.path.join(parent,filename))

def countLine(fname):
    count = 0

    for file_line in open(fname, 'rb').readlines():
        if file_line != '' and file_line != '\n': 
            count += 1
    print (fname + '----' , count)
    return count
if __name__ == '__main__' :
    startTime = time.clock()
    getFile(basedir)
    totalline = 0
    for filelist in filelists:
        totalline = totalline + countLine(filelist)
    print ('total lines:',totalline)
    print ('Done! Cost Time: %0.2f second' % (time.clock() - startTime))

3. Scan the current directory and all subdirectories and display the sizes.

import os
import sys      
try:
    directory = sys.argv[1]   
except IndexError:
    sys.exit("Must provide an argument.")

dir_size = 0   
fsizedicr = {'Bytes': 1,
             'Kilobytes': float(1) / 1024,
             'Megabytes': float(1) / (1024 * 1024),
             'Gigabytes': float(1) / (1024 * 1024 * 1024)}
for (path, dirs, files) in os.walk(directory):      
    for file in files:                              
        filename = os.path.join(path, file)
        dir_size += os.path.getsize(filename)       

fsizeList = [str(round(fsizedicr[key] * dir_size, 2)) + " " + key for key in fsizedicr] 

if dir_size == 0: print ("File Empty") 
else:
  for units in sorted(fsizeList)[::-1]: 
      print ("Folder Size: " + units)

4. Move all files older than 240 days from the source directory to the target directory.

import shutil
import sys
import time
import os
import argparse

usage = 'python move_files_over_x_days.py -src [SRC] -dst [DST] -days [DAYS]'
description = 'Move files from src to dst if they are older than a certain number of days.  Default is 240 days'

args_parser = argparse.ArgumentParser(usage=usage, description=description)
args_parser.add_argument('-src', '--src', type=str, nargs='?', default='.', help='(OPTIONAL) Directory where files will be moved from. Defaults to current directory')
args_parser.add_argument('-dst', '--dst', type=str, nargs='?', required=True, help='(REQUIRED) Directory where files will be moved to.')
args_parser.add_argument('-days', '--days', type=int, nargs='?', default=240, help='(OPTIONAL) Days value specifies the minimum age of files to be moved. Default is 240.')
args = args_parser.parse_args()

if args.days < 0:
	args.days = 0

src = args.src  
dst = args.dst 
days = args.days
now = time.time()  

if not os.path.exists(dst):
	os.mkdir(dst)

for f in os.listdir(src): 
    if os.stat(f).st_mtime < now - days * 86400:  
        if os.path.isfile(f):  
            shutil.move(f, dst) 

5. Scan the script directory and give the count of different types of scripts.

import os																	
import shutil																
from time import strftime												

logsdir="c:\logs\puttylogs"											
zipdir="c:\logs\puttylogs\zipped_logs"							
zip_program="zip.exe"												

for files in os.listdir(logsdir):										
	if files.endswith(".log"):										
		files1=files+"."+strftime("%Y-%m-%d")+".zip"		
		os.chdir(logsdir) 												
		os.system(zip_program + " " +  files1 +" "+ files)	
		shutil.move(files1, zipdir)									 
		os.remove(files)													

6. Download Leetcode’s algorithm questions.

import sys
import re
import os
import argparse
import requests
from lxml import html as lxml_html

try:
    import html
except ImportError:
    import HTMLParser
    html = HTMLParser.HTMLParser()

try:
    import cPickle as pk
except ImportError:
    import pickle as pk

class LeetcodeProblems(object):
    def get_problems_info(self):
        leetcode_url = 'https://leetcode.com/problemset/algorithms'
        res = requests.get(leetcode_url)
        if not res.ok:
            print('request error')
            sys.exit()
        cm = res.text
        cmt = cm.split('tbody>')[-2]
        indexs = re.findall(r'<td>(\d+)</td>', cmt)
        problem_urls = ['https://leetcode.com' + url \
                        for url in re.findall(
                            r'<a href="(/problems/.+?)"', cmt)]
        levels = re.findall(r"<td value='\d*'>(.+?)</td>", cmt)
        tinfos = zip(indexs, levels, problem_urls)
        assert (len(indexs) == len(problem_urls) == len(levels))
        infos = []
        for info in tinfos:
            res = requests.get(info[-1])
            if not res.ok:
                print('request error')
                sys.exit()
            tree = lxml_html.fromstring(res.text)
            title = tree.xpath('//meta[@property="og:title"]/@content')[0]
            description = tree.xpath('//meta[@property="description"]/@content')
            if not description:
                description = tree.xpath('//meta[@property="og:description"]/@content')[0]
            else:
                description = description[0]
            description = html.unescape(description.strip())
            tags = tree.xpath('//div[@id="tags"]/following::a[@class="btn btn-xs btn-primary"]/text()')
            infos.append(
                {
                    'title': title,
                    'level': info[1],
                    'index': int(info[0]),
                    'description': description,
                    'tags': tags
                }
            )

        with open('leecode_problems.pk', 'wb') as g:
            pk.dump(infos, g)
        return infos

    def to_text(self, pm_infos):
        if self.args.index:
            key = 'index'
        elif self.args.title:
            key = 'title'
        elif self.args.tag:
            key = 'tags'
        elif self.args.level:
            key = 'level'
        else:
            key = 'index'

        infos = sorted(pm_infos, key=lambda i: i[key])

        text_template = '## {index} - {title}\n' \
            '~{level}~  {tags}\n' \
            '{description}\n' + '\n' * self.args.line
        text = ''
        for info in infos:
            if self.args.rm_blank:
                info['description'] = re.sub(r'[\n\r]+', r'\n', info['description'])
            text += text_template.format(**info)

        with open('leecode problems.txt', 'w') as g:
            g.write(text)

    def run(self):
        if os.path.exists('leecode_problems.pk') and not self.args.redownload:
            with open('leecode_problems.pk', 'rb') as f:
                pm_infos = pk.load(f)
        else:
            pm_infos = self.get_problems_info()

        print('find %s problems.' % len(pm_infos))
        self.to_text(pm_infos)

def handle_args(argv):
    p = argparse.ArgumentParser(description='extract all leecode problems to location')
    p.add_argument('--index', action='store_true', help='sort by index')
    p.add_argument('--level', action='store_true', help='sort by level')
    p.add_argument('--tag', action='store_true', help='sort by tag')
    p.add_argument('--title', action='store_true', help='sort by title')
    p.add_argument('--rm_blank', action='store_true', help='remove blank')
    p.add_argument('--line', action='store', type=int, default=10, help='blank of two problems')
    p.add_argument('-r', '--redownload', action='store_true', help='redownload data')
    args = p.parse_args(argv[1:])
    return args

def main(argv):
    args = handle_args(argv)
    x = LeetcodeProblems()
    x.args = args
    x.run()

if __name__ == '__main__':
    argv = sys.argv
    main(argv)

7. Convert Markdown to HTML.

import sys
import os

from bs4 import BeautifulSoup
import markdown

class MarkdownToHtml:

    headTag = '<head><meta charset="utf-8" /></head>'

    def __init__(self,cssFilePath = None):
        if cssFilePath != None:
            self.genStyle(cssFilePath)

    def genStyle(self,cssFilePath):
        with open(cssFilePath,'r') as f:
            cssString = f.read()
        self.headTag = self.headTag[:-7] + '<style type="text/css">{}</style>'.format(cssString) + self.headTag[-7:]

    def markdownToHtml(self, sourceFilePath, destinationDirectory = None, outputFileName = None):
        if not destinationDirectory:

            destinationDirectory = os.path.dirname(os.path.abspath(sourceFilePath))
        if not outputFileName:

            outputFileName = os.path.splitext(os.path.basename(sourceFilePath))[0] + '.html'
        if destinationDirectory[-1] != '/':
            destinationDirectory += '/'
        with open(sourceFilePath,'r', encoding='utf8') as f:
            markdownText = f.read()

        rawHtml = self.headTag + markdown.markdown(markdownText,output_format='html5')

        beautifyHtml = BeautifulSoup(rawHtml,'html5lib').prettify()
        with open(destinationDirectory + outputFileName, 'w', encoding='utf8') as f:
            f.write(beautifyHtml)

if __name__ == "__main__":
    mth = MarkdownToHtml()

    argv = sys.argv[1:]

    outputDirectory = None
    if '-s' in argv:
        cssArgIndex = argv.index('-s') +1
        cssFilePath = argv[cssArgIndex]

        if not os.path.isfile(cssFilePath):
            print('Invalid Path: '+cssFilePath)
            sys.exit()
        mth.genStyle(cssFilePath)

        argv.pop(cssArgIndex)
        argv.pop(cssArgIndex-1)
    if '-o' in argv:
        dirArgIndex = argv.index('-o') +1
        outputDirectory = argv[dirArgIndex]

        if not os.path.isdir(outputDirectory):
            print('Invalid Directory: ' + outputDirectory)
            sys.exit()

        argv.pop(dirArgIndex)
        argv.pop(dirArgIndex-1)

    for filePath in argv:

        if os.path.isfile(filePath):
            mth.markdownToHtml(filePath, outputDirectory)
        else:
            print('Invalid Path: ' + filePath)

8. Text file encoding detection and conversion.

import sys
import os
import argparse
from chardet.universaldetector import UniversalDetector

parser = argparse.ArgumentParser(description = '')
parser.add_argument('filePaths', nargs = '+',
                   help = '')
parser.add_argument('-e', '--encoding', nargs = '?', const = 'UTF-8',
                   help = '''

parser.add_argument('-o', '--output',
                   help = 'dir')

args = parser.parse_args()

if args.output != None:
    if not args.encoding:

        args.encoding = 'UTF-8'

    if not os.path.isdir(args.output):
        print('Invalid Directory: ' + args.output)
        sys.exit()
    else:
        if args.output[-1] != '/':
            args.output += '/'

detector = UniversalDetector()
print()
print('Encoding (Confidence)',':','File path')
for filePath in args.filePaths:

    if not os.path.isfile(filePath):
        print('Invalid Path: ' + filePath)
        continue

    detector.reset()

    for each in open(filePath, 'rb'):

        detector.feed(each)

        if detector.done:
            break

    detector.close()

    charEncoding = detector.result['encoding']
    confidence = detector.result['confidence']

    if charEncoding is None:
        charEncoding = 'Unknown'
        confidence = 0.99
    print('{} {:>12} : {}'.format(charEncoding.rjust(8),
        '('+str(confidence*100)+'%)', filePath))
    if args.encoding and charEncoding != 'Unknown' and confidence > 0.6:

        outputPath = args.output + os.path.basename(filePath) if args.output else filePath
        with open(filePath, 'r', encoding = charEncoding, errors = 'replace') as f:
            temp = f.read()
        with open(outputPath, 'w', encoding = args.encoding, errors = 'replace') as f:
            f.write(temp)

The last two scripts are selected from the course ” Using Python3 to Write a Series of Practical Scripts ” in the laboratory building. The course explains the implementation process of these two scripts in detail. Students who are interested can go directly to the laboratory building to study!

By lzz

Leave a Reply

Your email address will not be published. Required fields are marked *