I would recommend not using os.walk() , since you need to do so much to massage its output. Instead, just use a recursive function that uses os.listdir() , os.path.join() , os.path.isdir() , etc.
import os from xml.sax.saxutils import escape as xml_escape def DirAsXML(path): result = '<dir>\n<name>%s</name>\n' % xml_escape(os.path.basename(path)) dirs = [] files = [] for item in os.listdir(path): itempath = os.path.join(path, item) if os.path.isdir(itempath): dirs.append(item) elif os.path.isfile(itempath): files.append(item) if files: result += ' <files>\n' \ + '\n'.join(' <file>\n <name>%s</name>\n </file>' % xml_escape(f) for f in files) + '\n </files>\n' if dirs: for d in dirs: x = DirAsXML(os.path.join(path, d)) result += '\n'.join(' ' + line for line in x.split('\n')) result += '</dir>' return result if __name__ == '__main__': print '<structure>\n' + DirAsXML(os.getcwd()) + '\n</structure>'
Personally, I would recommend a much less complex XML schema by putting names in attributes and getting rid of the <files> group:
import os from xml.sax.saxutils import quoteattr as xml_quoteattr def DirAsLessXML(path): result = '<dir name=%s>\n' % xml_quoteattr(os.path.basename(path)) for item in os.listdir(path): itempath = os.path.join(path, item) if os.path.isdir(itempath): result += '\n'.join(' ' + line for line in DirAsLessXML(os.path.join(path, item)).split('\n')) elif os.path.isfile(itempath): result += ' <file name=%s />\n' % xml_quoteattr(item) result += '</dir>' return result if __name__ == '__main__': print '<structure>\n' + DirAsLessXML(os.getcwd()) + '\n</structure>'
This gives an output, for example:
<structure> <dir name="local"> <dir name=".hg"> <file name="00changelog.i" /> <file name="branch" /> <file name="branch.cache" /> <file name="dirstate" /> <file name="hgrc" /> <file name="requires" /> <dir name="store"> <file name="00changelog.i" />
and etc.
If os.walk() more like expat , it would be easier for you.
source share