ODFPY  1.2.0
 All Classes Namespaces Files Functions Variables
teletype.py
Go to the documentation of this file.
1 # -*- coding: utf-8 -*-
2 #
3 # Create and extract text from ODF, handling whitespace correctly.
4 # Copyright (C) 2008 J. David Eisenberg
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License along
17 # with this program; if not, write to the Free Software Foundation, Inc.,
18 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 
20 
21 ##
22 #
23 # Class for handling whitespace properly in OpenDocument.
24 #
25 # While it is possible to use getTextContent() and setTextContent()
26 # to extract or create ODF content, these won't extract or create
27 # the appropriate <text:s>, <text:tab>, or <text:line-break>
28 # elements. This module takes care of that problem.
29 #
30 
31 from odf.element import Node
32 import odf.opendocument
33 from odf.text import S,LineBreak,Tab
34 
36 
37  def __init__(self):
38  self.textBuffer = []
39  self.spaceCount = 0
40 
41  ##
42  # Process an input string, inserting
43  # <text:tab> elements for '\t',
44  # <text:line-break> elements for '\n', and
45  # <text:s> elements for runs of more than one blank.
46  # These will be added to the given element.
47  #
48  def addTextToElement(self, odfElement, s):
49  i = 0
50  ch = ' '
51 
52  # When we encounter a tab or newline, we can immediately
53  # dump any accumulated text and then emit the appropriate
54  # ODF element.
55  #
56  # When we encounter a space, we add it to the text buffer,
57  # and then collect more spaces. If there are more spaces
58  # after the first one, we dump the text buffer and then
59  # then emit the appropriate <text:s> element.
60 
61  while i < len(s):
62  ch = s[i]
63  if ch == '\t':
64  self._emitTextBuffer(odfElement)
65  odfElement.addElement(Tab())
66  i += 1
67  elif ch == '\n':
68  self._emitTextBuffer(odfElement);
69  odfElement.addElement(LineBreak())
70  i += 1
71  elif ch == ' ':
72  self.textBuffer.append(' ')
73  i += 1
74  self.spaceCount = 0
75  while i < len(s) and (s[i] == ' '):
76  self.spaceCount += 1
77  i += 1
78  if self.spaceCount > 0:
79  self._emitTextBuffer(odfElement)
80  self._emitSpaces(odfElement)
81  else:
82  self.textBuffer.append(ch)
83  i += 1
84 
85  self._emitTextBuffer(odfElement)
86 
87  ##
88  # Creates a Text Node whose contents are the current textBuffer.
89  # Side effect: clears the text buffer.
90  #
91  def _emitTextBuffer(self, odfElement):
92  if len(self.textBuffer) > 0:
93  odfElement.addText(''.join(self.textBuffer))
94  self.textBuffer = []
95 
96 
97  ##
98  # Creates a <text:s> element for the current spaceCount.
99  # Side effect: sets spaceCount back to zero
100  #
101  def _emitSpaces(self, odfElement):
102  if self.spaceCount > 0:
103  spaceElement = S(c=self.spaceCount)
104  odfElement.addElement(spaceElement)
105  self.spaceCount = 0
106 
107 def addTextToElement(odfElement, s):
108  wst = WhitespaceText()
109  wst.addTextToElement(odfElement, s)
110 
111 ##
112 # Extract text content from an Element, with whitespace represented
113 # properly. Returns the text, with tabs, spaces, and newlines
114 # correctly evaluated. This method recursively descends through the
115 # children of the given element, accumulating text and "unwrapping"
116 # <text:s>, <text:tab>, and <text:line-break> elements along the way.
117 #
118 def extractText(odfElement):
119  result = [];
120 
121  if len(odfElement.childNodes) != 0:
122  for child in odfElement.childNodes:
123  if child.nodeType == Node.TEXT_NODE:
124  result.append(child.data)
125  elif child.nodeType == Node.ELEMENT_NODE:
126  subElement = child
127  tagName = subElement.qname;
128  if tagName == (u"urn:oasis:names:tc:opendocument:xmlns:text:1.0", u"line-break"):
129  result.append("\n")
130  elif tagName == (u"urn:oasis:names:tc:opendocument:xmlns:text:1.0", u"tab"):
131  result.append("\t")
132  elif tagName == (u"urn:oasis:names:tc:opendocument:xmlns:text:1.0", u"s"):
133  c = subElement.getAttribute('c')
134  if c:
135  spaceCount = int(c)
136  else:
137  spaceCount = 1
138 
139  result.append(" " * spaceCount)
140  else:
141  result.append(extractText(subElement))
142  return ''.join(result)
def addTextToElement
Process an input string, inserting elements for '', elements for ' '...
Definition: teletype.py:48
def S
Definition: text.py:388
def Tab
Definition: text.py:472
def addTextToElement
Definition: teletype.py:107
def _emitTextBuffer
Creates a Text Node whose contents are the current textBuffer.
Definition: teletype.py:91
def extractText
Extract text content from an Element, with whitespace represented properly.
Definition: teletype.py:118
Definition: text.py:1
def _emitSpaces
Creates a element for the current spaceCount.
Definition: teletype.py:101
def LineBreak
Definition: text.py:238