ODFPY
1.2.0
All
Classes
Namespaces
Files
Functions
Variables
teletype.py
Go to the documentation of this file.
1
# -*- coding: utf-8 -*-
2
#
3
# Create and extract text from ODF, handling whitespace correctly.
4
# Copyright (C) 2008 J. David Eisenberg
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License along
17
# with this program; if not, write to the Free Software Foundation, Inc.,
18
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19
20
21
##
22
#
23
# Class for handling whitespace properly in OpenDocument.
24
#
25
# While it is possible to use getTextContent() and setTextContent()
26
# to extract or create ODF content, these won't extract or create
27
# the appropriate <text:s>, <text:tab>, or <text:line-break>
28
# elements. This module takes care of that problem.
29
#
30
31
from
odf.element
import
Node
32
import
odf.opendocument
33
from
odf.text
import
S,LineBreak,Tab
34
35
class
WhitespaceText
(
object
):
36
37
def
__init__
(self):
38
self.
textBuffer
= []
39
self.
spaceCount
= 0
40
41
##
42
# Process an input string, inserting
43
# <text:tab> elements for '\t',
44
# <text:line-break> elements for '\n', and
45
# <text:s> elements for runs of more than one blank.
46
# These will be added to the given element.
47
#
48
def
addTextToElement
(self, odfElement, s):
49
i = 0
50
ch =
' '
51
52
# When we encounter a tab or newline, we can immediately
53
# dump any accumulated text and then emit the appropriate
54
# ODF element.
55
#
56
# When we encounter a space, we add it to the text buffer,
57
# and then collect more spaces. If there are more spaces
58
# after the first one, we dump the text buffer and then
59
# then emit the appropriate <text:s> element.
60
61
while
i < len(s):
62
ch = s[i]
63
if
ch ==
'\t'
:
64
self.
_emitTextBuffer
(odfElement)
65
odfElement.addElement(
Tab
())
66
i += 1
67
elif
ch ==
'\n'
:
68
self.
_emitTextBuffer
(odfElement);
69
odfElement.addElement(
LineBreak
())
70
i += 1
71
elif
ch ==
' '
:
72
self.textBuffer.append(
' '
)
73
i += 1
74
self.
spaceCount
= 0
75
while
i < len(s)
and
(s[i] ==
' '
):
76
self.
spaceCount
+= 1
77
i += 1
78
if
self.
spaceCount
> 0:
79
self.
_emitTextBuffer
(odfElement)
80
self.
_emitSpaces
(odfElement)
81
else
:
82
self.textBuffer.append(ch)
83
i += 1
84
85
self.
_emitTextBuffer
(odfElement)
86
87
##
88
# Creates a Text Node whose contents are the current textBuffer.
89
# Side effect: clears the text buffer.
90
#
91
def
_emitTextBuffer(self, odfElement):
92
if
len(self.
textBuffer
) > 0:
93
odfElement.addText(
''
.join(self.
textBuffer
))
94
self.
textBuffer
= []
95
96
97
##
98
# Creates a <text:s> element for the current spaceCount.
99
# Side effect: sets spaceCount back to zero
100
#
101
def
_emitSpaces(self, odfElement):
102
if
self.
spaceCount
> 0:
103
spaceElement =
S
(c=self.
spaceCount
)
104
odfElement.addElement(spaceElement)
105
self.
spaceCount
= 0
106
107
def
addTextToElement
(odfElement, s):
108
wst =
WhitespaceText
()
109
wst.addTextToElement(odfElement, s)
110
111
##
112
# Extract text content from an Element, with whitespace represented
113
# properly. Returns the text, with tabs, spaces, and newlines
114
# correctly evaluated. This method recursively descends through the
115
# children of the given element, accumulating text and "unwrapping"
116
# <text:s>, <text:tab>, and <text:line-break> elements along the way.
117
#
118
def
extractText
(odfElement):
119
result = [];
120
121
if
len(odfElement.childNodes) != 0:
122
for
child
in
odfElement.childNodes:
123
if
child.nodeType == Node.TEXT_NODE:
124
result.append(child.data)
125
elif
child.nodeType == Node.ELEMENT_NODE:
126
subElement = child
127
tagName = subElement.qname;
128
if
tagName == (
u"urn:oasis:names:tc:opendocument:xmlns:text:1.0"
,
u"line-break"
):
129
result.append(
"\n"
)
130
elif
tagName == (
u"urn:oasis:names:tc:opendocument:xmlns:text:1.0"
,
u"tab"
):
131
result.append(
"\t"
)
132
elif
tagName == (
u"urn:oasis:names:tc:opendocument:xmlns:text:1.0"
,
u"s"
):
133
c = subElement.getAttribute(
'c'
)
134
if
c:
135
spaceCount = int(c)
136
else
:
137
spaceCount = 1
138
139
result.append(
" "
* spaceCount)
140
else
:
141
result.append(
extractText
(subElement))
142
return
''
.join(result)
odf.teletype.WhitespaceText.spaceCount
spaceCount
Definition:
teletype.py:39
odf.element
Definition:
element.py:1
odf.teletype.WhitespaceText.addTextToElement
def addTextToElement
Process an input string, inserting
elements for '',
elements for ' '...
Definition:
teletype.py:48
odf.text.S
def S
Definition:
text.py:388
odf.text.Tab
def Tab
Definition:
text.py:472
odf.teletype.addTextToElement
def addTextToElement
Definition:
teletype.py:107
odf.teletype.WhitespaceText._emitTextBuffer
def _emitTextBuffer
Creates a Text Node whose contents are the current textBuffer.
Definition:
teletype.py:91
odf.opendocument
Definition:
opendocument.py:1
odf.teletype.WhitespaceText.textBuffer
textBuffer
Definition:
teletype.py:38
odf.teletype.WhitespaceText
Definition:
teletype.py:35
odf.teletype.extractText
def extractText
Extract text content from an Element, with whitespace represented properly.
Definition:
teletype.py:118
object
odf.text
Definition:
text.py:1
odf.teletype.WhitespaceText._emitSpaces
def _emitSpaces
Creates a
element for the current spaceCount.
Definition:
teletype.py:101
odf.text.LineBreak
def LineBreak
Definition:
text.py:238
odf.teletype.WhitespaceText.__init__
def __init__
Definition:
teletype.py:37
odf
teletype.py
Generated on Tue Oct 28 2014 13:47:24 for ODFPY by
1.8.7