summaryrefslogtreecommitdiff
path: root/lib/python2.7/fileinput.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/python2.7/fileinput.py')
-rw-r--r--lib/python2.7/fileinput.py413
1 files changed, 413 insertions, 0 deletions
diff --git a/lib/python2.7/fileinput.py b/lib/python2.7/fileinput.py
new file mode 100644
index 0000000..ba48575
--- /dev/null
+++ b/lib/python2.7/fileinput.py
@@ -0,0 +1,413 @@
+"""Helper class to quickly write a loop over all standard input files.
+
+Typical use is:
+
+ import fileinput
+ for line in fileinput.input():
+ process(line)
+
+This iterates over the lines of all files listed in sys.argv[1:],
+defaulting to sys.stdin if the list is empty. If a filename is '-' it
+is also replaced by sys.stdin. To specify an alternative list of
+filenames, pass it as the argument to input(). A single file name is
+also allowed.
+
+Functions filename(), lineno() return the filename and cumulative line
+number of the line that has just been read; filelineno() returns its
+line number in the current file; isfirstline() returns true iff the
+line just read is the first line of its file; isstdin() returns true
+iff the line was read from sys.stdin. Function nextfile() closes the
+current file so that the next iteration will read the first line from
+the next file (if any); lines not read from the file will not count
+towards the cumulative line count; the filename is not changed until
+after the first line of the next file has been read. Function close()
+closes the sequence.
+
+Before any lines have been read, filename() returns None and both line
+numbers are zero; nextfile() has no effect. After all lines have been
+read, filename() and the line number functions return the values
+pertaining to the last line read; nextfile() has no effect.
+
+All files are opened in text mode by default, you can override this by
+setting the mode parameter to input() or FileInput.__init__().
+If an I/O error occurs during opening or reading a file, the IOError
+exception is raised.
+
+If sys.stdin is used more than once, the second and further use will
+return no lines, except perhaps for interactive use, or if it has been
+explicitly reset (e.g. using sys.stdin.seek(0)).
+
+Empty files are opened and immediately closed; the only time their
+presence in the list of filenames is noticeable at all is when the
+last file opened is empty.
+
+It is possible that the last line of a file doesn't end in a newline
+character; otherwise lines are returned including the trailing
+newline.
+
+Class FileInput is the implementation; its methods filename(),
+lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
+correspond to the functions in the module. In addition it has a
+readline() method which returns the next input line, and a
+__getitem__() method which implements the sequence behavior. The
+sequence must be accessed in strictly sequential order; sequence
+access and readline() cannot be mixed.
+
+Optional in-place filtering: if the keyword argument inplace=1 is
+passed to input() or to the FileInput constructor, the file is moved
+to a backup file and standard output is directed to the input file.
+This makes it possible to write a filter that rewrites its input file
+in place. If the keyword argument backup=".<some extension>" is also
+given, it specifies the extension for the backup file, and the backup
+file remains around; by default, the extension is ".bak" and it is
+deleted when the output file is closed. In-place filtering is
+disabled when standard input is read. XXX The current implementation
+does not work for MS-DOS 8+3 filesystems.
+
+Performance: this module is unfortunately one of the slower ways of
+processing large numbers of input lines. Nevertheless, a significant
+speed-up has been obtained by using readlines(bufsize) instead of
+readline(). A new keyword argument, bufsize=N, is present on the
+input() function and the FileInput() class to override the default
+buffer size.
+
+XXX Possible additions:
+
+- optional getopt argument processing
+- isatty()
+- read(), read(size), even readlines()
+
+"""
+
+import sys, os
+
+__all__ = ["input","close","nextfile","filename","lineno","filelineno",
+ "isfirstline","isstdin","FileInput"]
+
+_state = None
+
+DEFAULT_BUFSIZE = 8*1024
+
+def input(files=None, inplace=0, backup="", bufsize=0,
+ mode="r", openhook=None):
+ """input([files[, inplace[, backup[, mode[, openhook]]]]])
+
+ Create an instance of the FileInput class. The instance will be used
+ as global state for the functions of this module, and is also returned
+ to use during iteration. The parameters to this function will be passed
+ along to the constructor of the FileInput class.
+ """
+ global _state
+ if _state and _state._file:
+ raise RuntimeError, "input() already active"
+ _state = FileInput(files, inplace, backup, bufsize, mode, openhook)
+ return _state
+
+def close():
+ """Close the sequence."""
+ global _state
+ state = _state
+ _state = None
+ if state:
+ state.close()
+
+def nextfile():
+ """
+ Close the current file so that the next iteration will read the first
+ line from the next file (if any); lines not read from the file will
+ not count towards the cumulative line count. The filename is not
+ changed until after the first line of the next file has been read.
+ Before the first line has been read, this function has no effect;
+ it cannot be used to skip the first file. After the last line of the
+ last file has been read, this function has no effect.
+ """
+ if not _state:
+ raise RuntimeError, "no active input()"
+ return _state.nextfile()
+
+def filename():
+ """
+ Return the name of the file currently being read.
+ Before the first line has been read, returns None.
+ """
+ if not _state:
+ raise RuntimeError, "no active input()"
+ return _state.filename()
+
+def lineno():
+ """
+ Return the cumulative line number of the line that has just been read.
+ Before the first line has been read, returns 0. After the last line
+ of the last file has been read, returns the line number of that line.
+ """
+ if not _state:
+ raise RuntimeError, "no active input()"
+ return _state.lineno()
+
+def filelineno():
+ """
+ Return the line number in the current file. Before the first line
+ has been read, returns 0. After the last line of the last file has
+ been read, returns the line number of that line within the file.
+ """
+ if not _state:
+ raise RuntimeError, "no active input()"
+ return _state.filelineno()
+
+def fileno():
+ """
+ Return the file number of the current file. When no file is currently
+ opened, returns -1.
+ """
+ if not _state:
+ raise RuntimeError, "no active input()"
+ return _state.fileno()
+
+def isfirstline():
+ """
+ Returns true the line just read is the first line of its file,
+ otherwise returns false.
+ """
+ if not _state:
+ raise RuntimeError, "no active input()"
+ return _state.isfirstline()
+
+def isstdin():
+ """
+ Returns true if the last line was read from sys.stdin,
+ otherwise returns false.
+ """
+ if not _state:
+ raise RuntimeError, "no active input()"
+ return _state.isstdin()
+
+class FileInput:
+ """class FileInput([files[, inplace[, backup[, mode[, openhook]]]]])
+
+ Class FileInput is the implementation of the module; its methods
+ filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(),
+ nextfile() and close() correspond to the functions of the same name
+ in the module.
+ In addition it has a readline() method which returns the next
+ input line, and a __getitem__() method which implements the
+ sequence behavior. The sequence must be accessed in strictly
+ sequential order; random access and readline() cannot be mixed.
+ """
+
+ def __init__(self, files=None, inplace=0, backup="", bufsize=0,
+ mode="r", openhook=None):
+ if isinstance(files, basestring):
+ files = (files,)
+ else:
+ if files is None:
+ files = sys.argv[1:]
+ if not files:
+ files = ('-',)
+ else:
+ files = tuple(files)
+ self._files = files
+ self._inplace = inplace
+ self._backup = backup
+ self._bufsize = bufsize or DEFAULT_BUFSIZE
+ self._savestdout = None
+ self._output = None
+ self._filename = None
+ self._lineno = 0
+ self._filelineno = 0
+ self._file = None
+ self._isstdin = False
+ self._backupfilename = None
+ self._buffer = []
+ self._bufindex = 0
+ # restrict mode argument to reading modes
+ if mode not in ('r', 'rU', 'U', 'rb'):
+ raise ValueError("FileInput opening mode must be one of "
+ "'r', 'rU', 'U' and 'rb'")
+ self._mode = mode
+ if inplace and openhook:
+ raise ValueError("FileInput cannot use an opening hook in inplace mode")
+ elif openhook and not hasattr(openhook, '__call__'):
+ raise ValueError("FileInput openhook must be callable")
+ self._openhook = openhook
+
+ def __del__(self):
+ self.close()
+
+ def close(self):
+ self.nextfile()
+ self._files = ()
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ try:
+ line = self._buffer[self._bufindex]
+ except IndexError:
+ pass
+ else:
+ self._bufindex += 1
+ self._lineno += 1
+ self._filelineno += 1
+ return line
+ line = self.readline()
+ if not line:
+ raise StopIteration
+ return line
+
+ def __getitem__(self, i):
+ if i != self._lineno:
+ raise RuntimeError, "accessing lines out of order"
+ try:
+ return self.next()
+ except StopIteration:
+ raise IndexError, "end of input reached"
+
+ def nextfile(self):
+ savestdout = self._savestdout
+ self._savestdout = 0
+ if savestdout:
+ sys.stdout = savestdout
+
+ output = self._output
+ self._output = 0
+ if output:
+ output.close()
+
+ file = self._file
+ self._file = 0
+ if file and not self._isstdin:
+ file.close()
+
+ backupfilename = self._backupfilename
+ self._backupfilename = 0
+ if backupfilename and not self._backup:
+ try: os.unlink(backupfilename)
+ except OSError: pass
+
+ self._isstdin = False
+ self._buffer = []
+ self._bufindex = 0
+
+ def readline(self):
+ try:
+ line = self._buffer[self._bufindex]
+ except IndexError:
+ pass
+ else:
+ self._bufindex += 1
+ self._lineno += 1
+ self._filelineno += 1
+ return line
+ if not self._file:
+ if not self._files:
+ return ""
+ self._filename = self._files[0]
+ self._files = self._files[1:]
+ self._filelineno = 0
+ self._file = None
+ self._isstdin = False
+ self._backupfilename = 0
+ if self._filename == '-':
+ self._filename = '<stdin>'
+ self._file = sys.stdin
+ self._isstdin = True
+ else:
+ if self._inplace:
+ self._backupfilename = (
+ self._filename + (self._backup or os.extsep+"bak"))
+ try: os.unlink(self._backupfilename)
+ except os.error: pass
+ # The next few lines may raise IOError
+ os.rename(self._filename, self._backupfilename)
+ self._file = open(self._backupfilename, self._mode)
+ try:
+ perm = os.fstat(self._file.fileno()).st_mode
+ except OSError:
+ self._output = open(self._filename, "w")
+ else:
+ fd = os.open(self._filename,
+ os.O_CREAT | os.O_WRONLY | os.O_TRUNC,
+ perm)
+ self._output = os.fdopen(fd, "w")
+ try:
+ if hasattr(os, 'chmod'):
+ os.chmod(self._filename, perm)
+ except OSError:
+ pass
+ self._savestdout = sys.stdout
+ sys.stdout = self._output
+ else:
+ # This may raise IOError
+ if self._openhook:
+ self._file = self._openhook(self._filename, self._mode)
+ else:
+ self._file = open(self._filename, self._mode)
+ self._buffer = self._file.readlines(self._bufsize)
+ self._bufindex = 0
+ if not self._buffer:
+ self.nextfile()
+ # Recursive call
+ return self.readline()
+
+ def filename(self):
+ return self._filename
+
+ def lineno(self):
+ return self._lineno
+
+ def filelineno(self):
+ return self._filelineno
+
+ def fileno(self):
+ if self._file:
+ try:
+ return self._file.fileno()
+ except ValueError:
+ return -1
+ else:
+ return -1
+
+ def isfirstline(self):
+ return self._filelineno == 1
+
+ def isstdin(self):
+ return self._isstdin
+
+
+def hook_compressed(filename, mode):
+ ext = os.path.splitext(filename)[1]
+ if ext == '.gz':
+ import gzip
+ return gzip.open(filename, mode)
+ elif ext == '.bz2':
+ import bz2
+ return bz2.BZ2File(filename, mode)
+ else:
+ return open(filename, mode)
+
+
+def hook_encoded(encoding):
+ import codecs
+ def openhook(filename, mode):
+ return codecs.open(filename, mode, encoding)
+ return openhook
+
+
+def _test():
+ import getopt
+ inplace = 0
+ backup = 0
+ opts, args = getopt.getopt(sys.argv[1:], "ib:")
+ for o, a in opts:
+ if o == '-i': inplace = 1
+ if o == '-b': backup = a
+ for line in input(args, inplace=inplace, backup=backup):
+ if line[-1:] == '\n': line = line[:-1]
+ if line[-1:] == '\r': line = line[:-1]
+ print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
+ isfirstline() and "*" or "", line)
+ print "%d: %s[%d]" % (lineno(), filename(), filelineno())
+
+if __name__ == '__main__':
+ _test()