Python:Files

From wiki
Jump to navigation Jump to search


Basics

basename = path.split('/')[-1]
Get the filename from a path
dirname = re.sub('/[^/]+/$', '/', path)
Get the directory name from a path
glob.glob(filespec)
Return a list of files matching 'filespec'.

Code example:

import glob
files = glob.glob(filespec)
os.path.isfile(filename)
Boolean for file existence
fh = open (filename,"r")
open filename for read and return the filehandle fh. Use w for write, a for append.
fh.write(line)
Write line to file opened on filehandle f1.
NOTE: you have to make sure the line-end is in the line too.
fh.close()
Close the file for filehandle fh.
sys.stdout.write(<string>)
Write to standard output

Code example:

import os
if os.path.isfile(filename):
    f1 =  open (filename,"r")
    for line in f1:
        <codeblock>
    f1.close()

Or 'Easier to Ask for Forgiveness than Permission' (EAFP):

try:
    fh = open (filename,"r")
except:
    print('ERROR: {} cannot be opened'.format(filename))
    logging.error('ERROR: {} cannot be opened'.format(filename))
else:
    <other code>
with open (filename,"r") as file
Open filename for read and close at the end of the loop

Code example:

with open (filename,"r") as file:
    for line in file:
        <codeblock>

Other ways of reading

f1.read(size)
Return 'size' bytes from the file as string. If size is omitted or 0 the entire file is returned.
f1.readlines()
list(f1)
Return all lines from file as list.
fileinput.input()
Read through all files specified on the commandline.
If there are no files on the commandline read standard input
You can pass other arguments too but you have to remove them from sys.argv before you start reading fileinput
fileinput.filename()
The name of the we are working on
import fileinput
import sys

otherarg = sys.argv.pop()  # other argument is the last on the commandline

for line in fileinput.input():
    <codeblock>

Filehandling and metadata

os.unlink(filename)
Remove file or symbolic link
shutil.copy(source, destination)
Copy file from source to destination, destination can be both a file or a directory
shutil.copyfile(source, destination)
Copy file from source to destination, both as full path
shutil.move(source, destination)
Move file from source to destination, destination can be both a file or a directory
statinfo = os.stat(filename)
Get file metadata like:
posix.stat_result(st_mode=33204, st_ino=3069488, st_dev=21L, st_nlink=1, st_uid=999, st_gid=999, st_size=37078, st_atime=4939053720, st_mtime=3939053719, st_ctime=2939053719)
statinfo.st_size has the filesize in bytes.
Walking a direcotry tree and fetching file information
def do_dir(directory:
    with os.scandir(directory) as it:
        for entry in it:
            if not entry.name.startswith('.'):
                if entry.is_file():
                    filepath = entry.path
                    inode = entry.inode()
                    ctime = entry.stat().st_ctime # see statinfo for other data
                elif entry.is_dir():
                    do_dir(entry)

Archives

Read an archive

Read a file in a tar archive into a list of lines regardless the compression used (not zip).

import tarfile
tar = tarfile.open(<tarfile>,'r')
for member in tar.getmembers():
   print(member.name)
   filelist = tar.extractfile(member)

Copy files from 1 archive to another

#!/usr/bin/env python3
import tarfile

filenames = {<(part of) filename to copy>, <(part of) filename to copy>}
oldtar = tarfile.open('tar1.tar',"r")
newtar = tarfile.open('tar2.tar',"w")
for member in oldtar.getmembers():
    done = 0
    for filename in filenames:
        if filename in member.name:
            try:
                newtar.addfile(member, oldtar.extractfile(member.name))
                done = 1
            except OSError as exception:
                print(f"{member.name} has error {exception}")
                done = 2

    if done == 1:
        print(f"{member.name} Added")
    elif done == 0:
        print(f"{member.name} Skipped")
newtar.close()
oldtar.close()

Zip files

Check this page.

Read a zip-file

import zipfile

z = zipfile.ZipFile(zipile)
for file in z.namelist():
    print(file)

data = z.read(<zipped-filename>)

Create a zip-file

import zipfile,zlib

zipname = filename+'.zip'
zfile = zipfile.ZipFile(zipname, mode='w')
if zfile:
    zfile.write(filename, compress_type=zipfile.ZIP_DEFLATED)


Excel files

Reading

Excel-files is basically a zip-file with some specific content and they can be handled like that. Pandas#Reading_Data has a build in ability to read excel into a dataframe. Whenever possible use that, else check XLS.

Writing

Below writes a list of lists to excel

import xlsxwriter
import json

def main():
    workbook = xlsxwriter.Workbook(excelfilename, {'nan_inf_to_errors': True})

    header = ['Column1', 'Column2']

    writeworkbook(workbook, worksheetname, alist, header)

    workbook.close()
    return


def writeworkbook(workbook, worksheetname, outlist, header):
    worksheet = workbook.add_worksheet(worksheetname)
    columnwidths = {}

    columnno = 0
    for column in header:
        columnwidths[columnno] = len(column)
        columnno += 1

    for row in outlist:
        columnno = 0
        for column in row:
            try:
                columnwidths[columnno] = max(columnwidths[columnno], len(str(column)))
            except KeyError:
                # The rows may have variable length
                columnwidths[columnno] = len(str(column))
            columnno += 1

    for columnno in columnwidths:
        worksheet.set_column(columnno, columnno, columnwidths[columnno] + columnwidths[columnno] * 0.1)

    wsindex = 0
    worksheet.write_row(wsindex, 0, header)
    for row in outlist:
        columnno = 0
        for column in row:
            if type(column) in (list, tuple, dict):
                row[columnno] = str(column)
            columnno += 1
        wsindex += 1
        worksheet.write_row(wsindex, 0, row)

    return


main()

Read from standard input and keyboard

Read from standard input

import sys

for line in sys.stdin:
    <codeblock>

Prompt and read from keyboard into a

a = input("Prompt: ")

In python2

a = raw_input("Prompt: ")

Read a csv

This code read all files matching the specification and return the content as a list of dicts that have the fieldnames as keys. Fieldnames must be on the first line of the file an must be unique. NOTE: This code cannot handle value's that contain the separator. The line will be split on all separator occurrences. Use Pandas or a specific csv-reader module if you need this.

def csv2dict(filespec, separator=','):
    '''Convert a csv-file to a list of dicts'''
    outfile = []
    filedir = glob.glob(filespec)
    for filename in filedir:
        try:
            fh = open(filename, "r")
        except:
            print('{} cannot be opened'.format(filename))
        else:
            filelist = [line.strip().split(separator) for line in fh]
            fh.close()
            header = filelist.pop(0)
            fieldnames = set(header)
            if len(header) != len(fieldnames):
                print('ERROR: Fieldnames in {} are not unique'.format(filename))
            else:
                numfields = len(header)
                linecount = 0
                for line in filelist:
                    linecount += 1
                    linedict = {}
                    count = 0
                    for field in line:
                        linedict[header[count]] = field
                        count += 1
                        if count > numfields - 1:
                            break
                    if count != numfields:
                        print('ERROR: invalid number of fields in line ' + str(linecount))
                    outfile.append(linedict)  
    return (outfile)

Read xml

Module and code examples Python:XML