Belle II Software development
bitstring.py
1#!/usr/bin/env python
2
3r"""
4This package defines classes that simplify bit-wise creation, manipulation and
5interpretation of data.
6
7Classes:
8
9Bits -- An immutable container for binary data.
10BitArray -- A mutable container for binary data.
11ConstBitStream -- An immutable container with streaming methods.
12BitStream -- A mutable container with streaming methods.
13
14 Bits (base class)
15 / \
16 + mutating methods / \ + streaming methods
17 / \
18 BitArray ConstBitStream
19 \ /
20 \ /
21 \ /
22 BitStream
23
24Functions:
25
26pack -- Create a BitStream from a format string.
27
28Exceptions:
29
30Error -- Module exception base class.
31CreationError -- Error during creation.
32InterpretError -- Inappropriate interpretation of binary data.
33ByteAlignError -- Whole byte position or length needed.
34ReadError -- Reading or peeking past the end of a bitstring.
35
36https://github.com/scott-griffiths/bitstring
37"""
38
39__licence__ = """
40The MIT License
41
42Copyright (c) 2006-2016 Scott Griffiths (dr.scottgriffiths@gmail.com)
43
44Permission is hereby granted, free of charge, to any person obtaining a copy
45of this software and associated documentation files (the "Software"), to deal
46in the Software without restriction, including without limitation the rights
47to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
48copies of the Software, and to permit persons to whom the Software is
49furnished to do so, subject to the following conditions:
50
51The above copyright notice and this permission notice shall be included in
52all copies or substantial portions of the Software.
53
54THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
55IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
56FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
57AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
58LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
59OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
60THE SOFTWARE.
61"""
62
63__version__ = "3.1.5"
64
65__author__ = "Scott Griffiths"
66
67import numbers
68import copy
69import sys
70import re
71import binascii
72import mmap
73import os
74import struct
75import operator
76import collections
77import array
78
79byteorder = sys.byteorder
80
81bytealigned = False
82"""Determines whether a number of methods default to working only on byte boundaries."""
83
84# Maximum number of digits to use in __str__ and __repr__.
85MAX_CHARS = 250
86
87# Maximum size of caches used for speed optimisations.
88CACHE_SIZE = 1000
89
90
91class Error(Exception):
92 """Base class for errors in the bitstring module."""
93
94 def __init__(self, *params):
95 self.msg = params[0] if params else ''
96 self.params = params[1:]
97
98 def __str__(self):
99 if self.params:
100 return self.msg.format(*self.params)
101 return self.msg
102
103
104class ReadError(Error, IndexError):
105 """Reading or peeking past the end of a bitstring."""
106
107 def __init__(self, *params):
108 Error.__init__(self, *params)
109
110
111class InterpretError(Error, ValueError):
112 """Inappropriate interpretation of binary data."""
113
114 def __init__(self, *params):
115 Error.__init__(self, *params)
116
117
119 """Whole-byte position or length needed."""
120
121 def __init__(self, *params):
122 Error.__init__(self, *params)
123
124
125class CreationError(Error, ValueError):
126 """Inappropriate argument during bitstring creation."""
127
128 def __init__(self, *params):
129 Error.__init__(self, *params)
130
131
132class ConstByteStore(object):
133 """Stores raw bytes together with a bit offset and length.
134
135 Used internally - not part of public interface.
136 """
137
138 __slots__ = ('offset', '_rawarray', 'bitlength')
139
140 def __init__(self, data, bitlength=None, offset=None):
141 """data is either a bytearray or a MmapByteArray"""
142 self._rawarray = data
143 if offset is None:
144 offset = 0
145 if bitlength is None:
146 bitlength = 8 * len(data) - offset
147 self.offset = offset
148 self.bitlength = bitlength
149
150 def getbit(self, pos):
151 assert 0 <= pos < self.bitlength
152 byte, bit = divmod(self.offset + pos, 8)
153 return bool(self._rawarray[byte] & (128 >> bit))
154
155 def getbyte(self, pos):
156 """Direct access to byte data."""
157 return self._rawarray[pos]
158
159 def getbyteslice(self, start, end):
160 """Direct access to byte data."""
161 c = self._rawarray[start:end]
162 return c
163
164 @property
165 def bytelength(self):
166 if not self.bitlength:
167 return 0
168 sb = self.offset // 8
169 eb = (self.offset + self.bitlength - 1) // 8
170 return eb - sb + 1
171
172 def __copy__(self):
173 return ByteStore(self._rawarray[:], self.bitlength, self.offset)
174
175 def _appendstore(self, store):
176 """Join another store on to the end of this one."""
177 if not store.bitlength:
178 return
179 # Set new array offset to the number of bits in the final byte of current array.
180 store = offsetcopy(store, (self.offset + self.bitlength) % 8)
181 if store.offset:
182 # first do the byte with the join.
183 joinval = (self._rawarray.pop() & (255 ^ (255 >> store.offset)) |
184 (store.getbyte(0) & (255 >> store.offset)))
185 self._rawarray.append(joinval)
186 self._rawarray.extend(store._rawarray[1:])
187 else:
188 self._rawarray.extend(store._rawarray)
189 self.bitlength += store.bitlength
190
191 def _prependstore(self, store):
192 """Join another store on to the start of this one."""
193 if not store.bitlength:
194 return
195 # Set the offset of copy of store so that it's final byte
196 # ends in a position that matches the offset of self,
197 # then join self on to the end of it.
198 store = offsetcopy(store, (self.offset - store.bitlength) % 8)
199 assert (store.offset + store.bitlength) % 8 == self.offset % 8
200 bit_offset = self.offset % 8
201 if bit_offset:
202 # first do the byte with the join.
203 store.setbyte(-1, (store.getbyte(-1) & (255 ^ (255 >> bit_offset)) |
204 (self._rawarray[self.byteoffset] & (255 >> bit_offset))))
205 store._rawarray.extend(self._rawarray[self.byteoffset + 1: self.byteoffset + self.bytelength])
206 else:
207 store._rawarray.extend(self._rawarray[self.byteoffset: self.byteoffset + self.bytelength])
208 self._rawarray = store._rawarray
209 self.offset = store.offset
210 self.bitlength += store.bitlength
211
212 @property
213 def byteoffset(self):
214 return self.offset // 8
215
216 @property
217 def rawbytes(self):
218 return self._rawarray
219
220
222 """Adding mutating methods to ConstByteStore
223
224 Used internally - not part of public interface.
225 """
226 __slots__ = ()
227
228 def setbit(self, pos):
229 assert 0 <= pos < self.bitlength
230 byte, bit = divmod(self.offset + pos, 8)
231 self._rawarray[byte] |= (128 >> bit)
232
233 def unsetbit(self, pos):
234 assert 0 <= pos < self.bitlength
235 byte, bit = divmod(self.offset + pos, 8)
236 self._rawarray[byte] &= ~(128 >> bit)
237
238 def invertbit(self, pos):
239 assert 0 <= pos < self.bitlength
240 byte, bit = divmod(self.offset + pos, 8)
241 self._rawarray[byte] ^= (128 >> bit)
242
243 def setbyte(self, pos, value):
244 self._rawarray[pos] = value
245
246 def setbyteslice(self, start, end, value):
247 self._rawarray[start:end] = value
248
249
250def offsetcopy(s, newoffset):
251 """Return a copy of a ByteStore with the newoffset.
252
253 Not part of public interface.
254 """
255 assert 0 <= newoffset < 8
256 if not s.bitlength:
257 return copy.copy(s)
258 else:
259 if newoffset == s.offset % 8:
260 return ByteStore(s.getbyteslice(s.byteoffset, s.byteoffset + s.bytelength), s.bitlength, newoffset)
261 newdata = []
262 d = s._rawarray
263 assert newoffset != s.offset % 8
264 if newoffset < s.offset % 8:
265 # We need to shift everything left
266 shiftleft = s.offset % 8 - newoffset
267 # First deal with everything except for the final byte
268 for x in range(s.byteoffset, s.byteoffset + s.bytelength - 1):
269 newdata.append(((d[x] << shiftleft) & 0xff) +
270 (d[x + 1] >> (8 - shiftleft)))
271 bits_in_last_byte = (s.offset + s.bitlength) % 8
272 if not bits_in_last_byte:
273 bits_in_last_byte = 8
274 if bits_in_last_byte > shiftleft:
275 newdata.append((d[s.byteoffset + s.bytelength - 1] << shiftleft) & 0xff)
276 else: # newoffset > s._offset % 8
277 shiftright = newoffset - s.offset % 8
278 newdata.append(s.getbyte(0) >> shiftright)
279 for x in range(s.byteoffset + 1, s.byteoffset + s.bytelength):
280 newdata.append(((d[x - 1] << (8 - shiftright)) & 0xff) +
281 (d[x] >> shiftright))
282 bits_in_last_byte = (s.offset + s.bitlength) % 8
283 if not bits_in_last_byte:
284 bits_in_last_byte = 8
285 if bits_in_last_byte + shiftright > 8:
286 newdata.append((d[s.byteoffset + s.bytelength - 1] << (8 - shiftright)) & 0xff)
287 new_s = ByteStore(bytearray(newdata), s.bitlength, newoffset)
288 assert new_s.offset == newoffset
289 return new_s
290
291
292def equal(a, b):
293 """Return True if ByteStores a == b.
294
295 Not part of public interface.
296 """
297 # We want to return False for inequality as soon as possible, which
298 # means we get lots of special cases.
299 # First the easy one - compare lengths:
300 a_bitlength = a.bitlength
301 b_bitlength = b.bitlength
302 if a_bitlength != b_bitlength:
303 return False
304 if not a_bitlength:
305 assert b_bitlength == 0
306 return True
307 # Make 'a' the one with the smaller offset
308 if (a.offset % 8) > (b.offset % 8):
309 a, b = b, a
310 # and create some aliases
311 a_bitoff = a.offset % 8
312 b_bitoff = b.offset % 8
313 a_byteoffset = a.byteoffset
314 b_byteoffset = b.byteoffset
315 a_bytelength = a.bytelength
316 b_bytelength = b.bytelength
317 da = a._rawarray
318 db = b._rawarray
319
320 # If they are pointing to the same data, they must be equal
321 if da is db and a.offset == b.offset:
322 return True
323
324 if a_bitoff == b_bitoff:
325 bits_spare_in_last_byte = 8 - (a_bitoff + a_bitlength) % 8
326 if bits_spare_in_last_byte == 8:
327 bits_spare_in_last_byte = 0
328 # Special case for a, b contained in a single byte
329 if a_bytelength == 1:
330 a_val = ((da[a_byteoffset] << a_bitoff) & 0xff) >> (8 - a_bitlength)
331 b_val = ((db[b_byteoffset] << b_bitoff) & 0xff) >> (8 - b_bitlength)
332 return a_val == b_val
333 # Otherwise check first byte
334 if da[a_byteoffset] & (0xff >> a_bitoff) != db[b_byteoffset] & (0xff >> b_bitoff):
335 return False
336 # then everything up to the last
337 b_a_offset = b_byteoffset - a_byteoffset
338 for x in range(1 + a_byteoffset, a_byteoffset + a_bytelength - 1):
339 if da[x] != db[b_a_offset + x]:
340 return False
341 # and finally the last byte
342 return (da[a_byteoffset + a_bytelength - 1] >> bits_spare_in_last_byte ==
343 db[b_byteoffset + b_bytelength - 1] >> bits_spare_in_last_byte)
344
345 assert a_bitoff != b_bitoff
346 # This is how much we need to shift a to the right to compare with b:
347 shift = b_bitoff - a_bitoff
348 # Special case for b only one byte long
349 if b_bytelength == 1:
350 assert a_bytelength == 1
351 a_val = ((da[a_byteoffset] << a_bitoff) & 0xff) >> (8 - a_bitlength)
352 b_val = ((db[b_byteoffset] << b_bitoff) & 0xff) >> (8 - b_bitlength)
353 return a_val == b_val
354 # Special case for a only one byte long
355 if a_bytelength == 1:
356 assert b_bytelength == 2
357 a_val = ((da[a_byteoffset] << a_bitoff) & 0xff) >> (8 - a_bitlength)
358 b_val = ((db[b_byteoffset] << 8) + db[b_byteoffset + 1]) << b_bitoff
359 b_val &= 0xffff
360 b_val >>= 16 - b_bitlength
361 return a_val == b_val
362
363 # Compare first byte of b with bits from first byte of a
364 if (da[a_byteoffset] & (0xff >> a_bitoff)) >> shift != db[b_byteoffset] & (0xff >> b_bitoff):
365 return False
366 # Now compare every full byte of b with bits from 2 bytes of a
367 for x in range(1, b_bytelength - 1):
368 # Construct byte from 2 bytes in a to compare to byte in b
369 b_val = db[b_byteoffset + x]
370 a_val = ((da[a_byteoffset + x - 1] << 8) + da[a_byteoffset + x]) >> shift
371 a_val &= 0xff
372 if a_val != b_val:
373 return False
374
375 # Now check bits in final byte of b
376 final_b_bits = (b.offset + b_bitlength) % 8
377 if not final_b_bits:
378 final_b_bits = 8
379 b_val = db[b_byteoffset + b_bytelength - 1] >> (8 - final_b_bits)
380 final_a_bits = (a.offset + a_bitlength) % 8
381 if not final_a_bits:
382 final_a_bits = 8
383 if b.bytelength > a_bytelength:
384 assert b_bytelength == a_bytelength + 1
385 a_val = da[a_byteoffset + a_bytelength - 1] >> (8 - final_a_bits)
386 a_val &= 0xff >> (8 - final_b_bits)
387 return a_val == b_val
388 assert a_bytelength == b_bytelength
389 a_val = da[a_byteoffset + a_bytelength - 2] << 8
390 a_val += da[a_byteoffset + a_bytelength - 1]
391 a_val >>= (8 - final_a_bits)
392 a_val &= 0xff >> (8 - final_b_bits)
393 return a_val == b_val
394
395
396class MmapByteArray(object):
397 """Looks like a bytearray, but from an mmap.
398
399 Not part of public interface.
400 """
401
402 __slots__ = ('filemap', 'filelength', 'source', 'byteoffset', 'bytelength')
403
404 def __init__(self, source, bytelength=None, byteoffset=None):
405 self.source = source
406 source.seek(0, os.SEEK_END)
407 self.filelength = source.tell()
408 if byteoffset is None:
409 byteoffset = 0
410 if bytelength is None:
411 bytelength = self.filelength - byteoffset
412 self.byteoffset = byteoffset
413 self.bytelength = bytelength
414 self.filemap = mmap.mmap(source.fileno(), 0, access=mmap.ACCESS_READ)
415
416 def __getitem__(self, key):
417 try:
418 start = key.start
419 stop = key.stop
420 except AttributeError:
421 try:
422 assert 0 <= key < self.bytelength
423 return ord(self.filemap[key + self.byteoffset])
424 except TypeError:
425 # for Python 3
426 return self.filemap[key + self.byteoffset]
427 else:
428 if start is None:
429 start = 0
430 if stop is None:
431 stop = self.bytelength
432 assert key.step is None
433 assert 0 <= start < self.bytelength
434 assert 0 <= stop <= self.bytelength
435 s = slice(start + self.byteoffset, stop + self.byteoffset)
436 return bytearray(self.filemap.__getitem__(s))
437
438 def __len__(self):
439 return self.bytelength
440
441
442# This creates a dictionary for every possible byte with the value being
443# the key with its bits reversed.
444BYTE_REVERSAL_DICT = dict()
445
446# For Python 2.x/ 3.x coexistence
447# Yes this is very very hacky.
448try:
449 xrange
450 for i in range(256):
451 BYTE_REVERSAL_DICT[i] = chr(int("{0:08b}".format(i)[::-1], 2))
452except NameError:
453 for i in range(256):
454 BYTE_REVERSAL_DICT[i] = bytes([int("{0:08b}".format(i)[::-1], 2)])
455 from io import IOBase as file
456 xrange = range
457 basestring = str
458
459# Python 2.x octals start with '0', in Python 3 it's '0o'
460LEADING_OCT_CHARS = len(oct(1)) - 1
461
462
464 """Return string made lowercase and with all whitespace removed."""
465 s = ''.join(s.split()).lower()
466 return s
467
468
469INIT_NAMES = ('uint', 'int', 'ue', 'se', 'sie', 'uie', 'hex', 'oct', 'bin', 'bits',
470 'uintbe', 'intbe', 'uintle', 'intle', 'uintne', 'intne',
471 'float', 'floatbe', 'floatle', 'floatne', 'bytes', 'bool', 'pad')
472
473TOKEN_RE = re.compile(r'(?P<name>' + '|'.join(INIT_NAMES) +
474 r')((:(?P<len>[^=]+)))?(=(?P<value>.*))?$', re.IGNORECASE)
475DEFAULT_UINT = re.compile(r'(?P<len>[^=]+)?(=(?P<value>.*))?$', re.IGNORECASE)
476
477MULTIPLICATIVE_RE = re.compile(r'(?P<factor>.*)\*(?P<token>.+)')
478
479# Hex, oct or binary literals
480LITERAL_RE = re.compile(r'(?P<name>0(x|o|b))(?P<value>.+)', re.IGNORECASE)
481
482# An endianness indicator followed by one or more struct.pack codes
483STRUCT_PACK_RE = re.compile(r'(?P<endian><|>|@)?(?P<fmt>(?:\d*[bBhHlLqQfd])+)$')
484
485# A number followed by a single character struct.pack code
486STRUCT_SPLIT_RE = re.compile(r'\d*[bBhHlLqQfd]')
487
488# These replicate the struct.pack codes
489# Big-endian
490REPLACEMENTS_BE = {'b': 'intbe:8', 'B': 'uintbe:8',
491 'h': 'intbe:16', 'H': 'uintbe:16',
492 'l': 'intbe:32', 'L': 'uintbe:32',
493 'q': 'intbe:64', 'Q': 'uintbe:64',
494 'f': 'floatbe:32', 'd': 'floatbe:64'}
495# Little-endian
496REPLACEMENTS_LE = {'b': 'intle:8', 'B': 'uintle:8',
497 'h': 'intle:16', 'H': 'uintle:16',
498 'l': 'intle:32', 'L': 'uintle:32',
499 'q': 'intle:64', 'Q': 'uintle:64',
500 'f': 'floatle:32', 'd': 'floatle:64'}
501
502# Size in bytes of all the pack codes.
503PACK_CODE_SIZE = {'b': 1, 'B': 1, 'h': 2, 'H': 2, 'l': 4, 'L': 4,
504 'q': 8, 'Q': 8, 'f': 4, 'd': 8}
505
506_tokenname_to_initialiser = {'hex': 'hex', '0x': 'hex', '0X': 'hex', 'oct': 'oct',
507 '0o': 'oct', '0O': 'oct', 'bin': 'bin', '0b': 'bin',
508 '0B': 'bin', 'bits': 'auto', 'bytes': 'bytes', 'pad': 'pad'}
509
510
511def structparser(token):
512 """Parse struct-like format string token into sub-token list."""
513 m = STRUCT_PACK_RE.match(token)
514 if not m:
515 return [token]
516 else:
517 endian = m.group('endian')
518 if endian is None:
519 return [token]
520 # Split the format string into a list of 'q', '4h' etc.
521 formatlist = re.findall(STRUCT_SPLIT_RE, m.group('fmt'))
522 # Now deal with mulitiplicative factors, 4h -> hhhh etc.
523 fmt = ''.join([f[-1] * int(f[:-1]) if len(f) != 1 else
524 f for f in formatlist])
525 if endian == '@':
526 # Native endianness
527 if byteorder == 'little':
528 endian = '<'
529 else:
530 assert byteorder == 'big'
531 endian = '>'
532 if endian == '<':
533 tokens = [REPLACEMENTS_LE[c] for c in fmt]
534 else:
535 assert endian == '>'
536 tokens = [REPLACEMENTS_BE[c] for c in fmt]
537 return tokens
538
539
540def tokenparser(fmt, keys=None, token_cache=None):
541 """Divide the format string into tokens and parse them.
542
543 Return stretchy token and list of [initialiser, length, value]
544 initialiser is one of: hex, oct, bin, uint, int, se, ue, 0x, 0o, 0b etc.
545 length is None if not known, as is value.
546
547 If the token is in the keyword dictionary (keys) then it counts as a
548 special case and isn't messed with.
549
550 tokens must be of the form: [factor*][initialiser][:][length][=value]
551
552 """
553 if token_cache is None:
554 token_cache = {}
555 try:
556 return token_cache[(fmt, keys)]
557 except KeyError:
558 token_key = (fmt, keys)
559 # Very inefficient expanding of brackets.
560 fmt = expand_brackets(fmt)
561 # Split tokens by ',' and remove whitespace
562 # The meta_tokens can either be ordinary single tokens or multiple
563 # struct-format token strings.
564 meta_tokens = (''.join(f.split()) for f in fmt.split(','))
565 return_values = []
566 stretchy_token = False
567 for meta_token in meta_tokens:
568 # See if it has a multiplicative factor
569 m = MULTIPLICATIVE_RE.match(meta_token)
570 if not m:
571 factor = 1
572 else:
573 factor = int(m.group('factor'))
574 meta_token = m.group('token')
575 # See if it's a struct-like format
576 tokens = structparser(meta_token)
577 ret_vals = []
578 for token in tokens:
579 if keys and token in keys:
580 # Don't bother parsing it, it's a keyword argument
581 ret_vals.append([token, None, None])
582 continue
583 value = length = None
584 if token == '':
585 continue
586 # Match literal tokens of the form 0x... 0o... and 0b...
587 m = LITERAL_RE.match(token)
588 if m:
589 name = m.group('name')
590 value = m.group('value')
591 ret_vals.append([name, length, value])
592 continue
593 # Match everything else:
594 m1 = TOKEN_RE.match(token)
595 if not m1:
596 # and if you don't specify a 'name' then the default is 'uint':
597 m2 = DEFAULT_UINT.match(token)
598 if not m2:
599 raise ValueError("Don't understand token '{0}'.".format(token))
600 if m1:
601 name = m1.group('name')
602 length = m1.group('len')
603 if m1.group('value'):
604 value = m1.group('value')
605 else:
606 assert m2
607 name = 'uint'
608 length = m2.group('len')
609 if m2.group('value'):
610 value = m2.group('value')
611 if name == 'bool':
612 if length is not None:
613 raise ValueError("You can't specify a length with bool tokens - they are always one bit.")
614 length = 1
615 if length is None and name not in ('se', 'ue', 'sie', 'uie'):
616 stretchy_token = True
617 if length is not None:
618 # Try converting length to int, otherwise check it's a key.
619 try:
620 length = int(length)
621 if length < 0:
622 raise Error
623 # For the 'bytes' token convert length to bits.
624 if name == 'bytes':
625 length *= 8
626 except Error:
627 raise ValueError("Can't read a token with a negative length.")
628 except ValueError:
629 if not keys or length not in keys:
630 raise ValueError("Don't understand length '{0}' of token.".format(length))
631 ret_vals.append([name, length, value])
632 # This multiplies by the multiplicative factor, but this means that
633 # we can't allow keyword values as multipliers (e.g. n*uint:8).
634 # The only way to do this would be to return the factor in some fashion
635 # (we can't use the key's value here as it would mean that we couldn't
636 # sensibly continue to cache the function's results. (TODO).
637 return_values.extend(ret_vals * factor)
638 return_values = [tuple(x) for x in return_values]
639 if len(token_cache) < CACHE_SIZE:
640 token_cache[token_key] = stretchy_token, return_values
641 return stretchy_token, return_values
642
643
644# Looks for first number*(
645BRACKET_RE = re.compile(r'(?P<factor>\d+)\*\‍(')
646
647
649 """Remove whitespace and expand all brackets."""
650 s = ''.join(s.split())
651 while True:
652 start = s.find('(')
653 if start == -1:
654 break
655 count = 1 # Number of hanging open brackets
656 p = start + 1
657 while p < len(s):
658 if s[p] == '(':
659 count += 1
660 if s[p] == ')':
661 count -= 1
662 if not count:
663 break
664 p += 1
665 if count:
666 raise ValueError("Unbalanced parenthesis in '{0}'.".format(s))
667 if start == 0 or s[start - 1] != '*':
668 s = s[0:start] + s[start + 1:p] + s[p + 1:]
669 else:
670 m = BRACKET_RE.search(s)
671 if m:
672 factor = int(m.group('factor'))
673 matchstart = m.start('factor')
674 s = s[0:matchstart] + (factor - 1) * (s[start + 1:p] + ',') + s[start + 1:p] + s[p + 1:]
675 else:
676 raise ValueError("Failed to parse '{0}'.".format(s))
677 return s
678
679
680# This converts a single octal digit to 3 bits.
681OCT_TO_BITS = ['{0:03b}'.format(i) for i in xrange(8)]
682
683# A dictionary of number of 1 bits contained in binary representation of any byte
684BIT_COUNT = dict(zip(xrange(256), [bin(i).count('1') for i in xrange(256)]))
685
686
687class Bits(object):
688 """A container holding an immutable sequence of bits.
689
690 For a mutable container use the BitArray class instead.
691
692 Methods:
693
694 all() -- Check if all specified bits are set to 1 or 0.
695 any() -- Check if any of specified bits are set to 1 or 0.
696 count() -- Count the number of bits set to 1 or 0.
697 cut() -- Create generator of constant sized chunks.
698 endswith() -- Return whether the bitstring ends with a sub-string.
699 find() -- Find a sub-bitstring in the current bitstring.
700 findall() -- Find all occurrences of a sub-bitstring in the current bitstring.
701 join() -- Join bitstrings together using current bitstring.
702 rfind() -- Seek backwards to find a sub-bitstring.
703 split() -- Create generator of chunks split by a delimiter.
704 startswith() -- Return whether the bitstring starts with a sub-bitstring.
705 tobytes() -- Return bitstring as bytes, padding if needed.
706 tofile() -- Write bitstring to file, padding if needed.
707 unpack() -- Interpret bits using format string.
708
709 Special methods:
710
711 Also available are the operators [], ==, !=, +, *, ~, <<, >>, &, |, ^.
712
713 Properties:
714
715 bin -- The bitstring as a binary string.
716 bool -- For single bit bitstrings, interpret as True or False.
717 bytes -- The bitstring as a bytes object.
718 float -- Interpret as a floating point number.
719 floatbe -- Interpret as a big-endian floating point number.
720 floatle -- Interpret as a little-endian floating point number.
721 floatne -- Interpret as a native-endian floating point number.
722 hex -- The bitstring as a hexadecimal string.
723 int -- Interpret as a two's complement signed integer.
724 intbe -- Interpret as a big-endian signed integer.
725 intle -- Interpret as a little-endian signed integer.
726 intne -- Interpret as a native-endian signed integer.
727 len -- Length of the bitstring in bits.
728 oct -- The bitstring as an octal string.
729 se -- Interpret as a signed exponential-Golomb code.
730 ue -- Interpret as an unsigned exponential-Golomb code.
731 sie -- Interpret as a signed interleaved exponential-Golomb code.
732 uie -- Interpret as an unsigned interleaved exponential-Golomb code.
733 uint -- Interpret as a two's complement unsigned integer.
734 uintbe -- Interpret as a big-endian unsigned integer.
735 uintle -- Interpret as a little-endian unsigned integer.
736 uintne -- Interpret as a native-endian unsigned integer.
737
738 """
739
740 __slots__ = ('_datastore')
741
742 def __init__(self, auto=None, length=None, offset=None, **kwargs):
743 """Either specify an 'auto' initialiser:
744 auto -- a string of comma separated tokens, an integer, a file object,
745 a bytearray, a boolean iterable, an array or another bitstring.
746
747 Or initialise via **kwargs with one (and only one) of:
748 bytes -- raw data as a string, for example read from a binary file.
749 bin -- binary string representation, e.g. '0b001010'.
750 hex -- hexadecimal string representation, e.g. '0x2ef'
751 oct -- octal string representation, e.g. '0o777'.
752 uint -- an unsigned integer.
753 int -- a signed integer.
754 float -- a floating point number.
755 uintbe -- an unsigned big-endian whole byte integer.
756 intbe -- a signed big-endian whole byte integer.
757 floatbe - a big-endian floating point number.
758 uintle -- an unsigned little-endian whole byte integer.
759 intle -- a signed little-endian whole byte integer.
760 floatle -- a little-endian floating point number.
761 uintne -- an unsigned native-endian whole byte integer.
762 intne -- a signed native-endian whole byte integer.
763 floatne -- a native-endian floating point number.
764 se -- a signed exponential-Golomb code.
765 ue -- an unsigned exponential-Golomb code.
766 sie -- a signed interleaved exponential-Golomb code.
767 uie -- an unsigned interleaved exponential-Golomb code.
768 bool -- a boolean (True or False).
769 filename -- a file which will be opened in binary read-only mode.
770
771 Other keyword arguments:
772 length -- length of the bitstring in bits, if needed and appropriate.
773 It must be supplied for all integer and float initialisers.
774 offset -- bit offset to the data. These offset bits are
775 ignored and this is mainly intended for use when
776 initialising using 'bytes' or 'filename'.
777
778 """
779
780 def __new__(cls, auto=None, length=None, offset=None, _cache=None, **kwargs):
781 # For instances auto-initialised with a string we intern the
782 # instance for re-use.
783 if _cache is None:
784 _cache = {}
785 try:
786 if isinstance(auto, basestring):
787 try:
788 return _cache[auto]
789 except KeyError:
790 x = object.__new__(Bits)
791 try:
792 _, tokens = tokenparser(auto)
793 except ValueError as e:
794 raise CreationError(*e.args)
795 x._datastore = ConstByteStore(bytearray(0), 0, 0)
796 for token in tokens:
797 x._datastore._appendstore(Bits._init_with_token(*token)._datastore)
798 assert x._assertsanity()
799 if len(_cache) < CACHE_SIZE:
800 _cache[auto] = x
801 return x
802 if isinstance(auto, Bits):
803 return auto
804 except TypeError:
805 pass
806 x = super(Bits, cls).__new__(cls)
807 x._initialise(auto, length, offset, **kwargs)
808 return x
809
810 def _initialise(self, auto, length, offset, **kwargs):
811 if length is not None and length < 0:
812 raise CreationError("bitstring length cannot be negative.")
813 if offset is not None and offset < 0:
814 raise CreationError("offset must be >= 0.")
815 if auto is not None:
816 self._initialise_from_auto(auto, length, offset)
817 return
818 if not kwargs:
819 # No initialisers, so initialise with nothing or zero bits
820 if length is not None and length != 0:
821 data = bytearray((length + 7) // 8)
822 self._setbytes_unsafe(data, length, 0)
823 return
824 self._setbytes_unsafe(bytearray(0), 0, 0)
825 return
826 k, v = kwargs.popitem()
827 try:
828 init_without_length_or_offset[k](self, v)
829 if length is not None or offset is not None:
830 raise CreationError("Cannot use length or offset with this initialiser.")
831 except KeyError:
832 try:
833 init_with_length_only[k](self, v, length)
834 if offset is not None:
835 raise CreationError("Cannot use offset with this initialiser.")
836 except KeyError:
837 if offset is None:
838 offset = 0
839 try:
840 init_with_length_and_offset[k](self, v, length, offset)
841 except KeyError:
842 raise CreationError("Unrecognised keyword '{0}' used to initialise.", k)
843
844 def _initialise_from_auto(self, auto, length, offset):
845 if offset is None:
846 offset = 0
847 self._setauto(auto, length, offset)
848 return
849
850 def __copy__(self):
851 """Return a new copy of the Bits for the copy module."""
852 # Note that if you want a new copy (different ID), use _copy instead.
853 # The copy can return self as it's immutable.
854 return self
855
856 def __lt__(self, other):
857 raise TypeError("unorderable type: {0}".format(type(self).__name__))
858
859 def __gt__(self, other):
860 raise TypeError("unorderable type: {0}".format(type(self).__name__))
861
862 def __le__(self, other):
863 raise TypeError("unorderable type: {0}".format(type(self).__name__))
864
865 def __ge__(self, other):
866 raise TypeError("unorderable type: {0}".format(type(self).__name__))
867
868 def __add__(self, bs):
869 """Concatenate bitstrings and return new bitstring.
870
871 bs -- the bitstring to append.
872
873 """
874 bs = Bits(bs)
875 if bs.len <= self.len:
876 s = self._copy()
877 s._append(bs)
878 else:
879 s = bs._copy()
880 s = self.__class__(s)
881 s._prepend(self)
882 return s
883
884 def __radd__(self, bs):
885 """Append current bitstring to bs and return new bitstring.
886
887 bs -- the string for the 'auto' initialiser that will be appended to.
888
889 """
890 bs = self._converttobitstring(bs)
891 return bs.__add__(self)
892
893 def __getitem__(self, key):
894 """Return a new bitstring representing a slice of the current bitstring.
895
896 Indices are in units of the step parameter (default 1 bit).
897 Stepping is used to specify the number of bits in each item.
898
899 >>> print BitArray('0b00110')[1:4]
900 '0b011'
901 >>> print BitArray('0x00112233')[1:3:8]
902 '0x1122'
903
904 """
905 length = self.len
906 try:
907 step = key.step if key.step is not None else 1
908 except AttributeError:
909 # single element
910 if key < 0:
911 key += length
912 if not 0 <= key < length:
913 raise IndexError("Slice index out of range.")
914 # Single bit, return True or False
915 return self._datastore.getbit(key)
916 else:
917 if step != 1:
918 # convert to binary string and use string slicing
919 bs = self.__class__()
920 bs._setbin_unsafe(self._getbin().__getitem__(key))
921 return bs
922 start, stop = 0, length
923 if key.start is not None:
924 start = key.start
925 if key.start < 0:
926 start += stop
927 if key.stop is not None:
928 stop = key.stop
929 if key.stop < 0:
930 stop += length
931 start = max(start, 0)
932 stop = min(stop, length)
933 if start < stop:
934 return self._slice(start, stop)
935 else:
936 return self.__class__()
937
938 def __len__(self):
939 """Return the length of the bitstring in bits."""
940 return self._getlength()
941
942 def __str__(self):
943 """Return approximate string representation of bitstring for printing.
944
945 Short strings will be given wholly in hexadecimal or binary. Longer
946 strings may be part hexadecimal and part binary. Very long strings will
947 be truncated with '...'.
948
949 """
950 length = self.len
951 if not length:
952 return ''
953 if length > MAX_CHARS * 4:
954 # Too long for hex. Truncate...
955 return ''.join(('0x', self._readhex(MAX_CHARS * 4, 0), '...'))
956 # If it's quite short and we can't do hex then use bin
957 if length < 32 and length % 4 != 0:
958 return '0b' + self.bin
959 # If we can use hex then do so
960 if not length % 4:
961 return '0x' + self.hex
962 # Otherwise first we do as much as we can in hex
963 # then add on 1, 2 or 3 bits on at the end
964 bits_at_end = length % 4
965 return ''.join(('0x', self._readhex(length - bits_at_end, 0),
966 ', ', '0b',
967 self._readbin(bits_at_end, length - bits_at_end)))
968
969 def __repr__(self):
970 """Return representation that could be used to recreate the bitstring.
971
972 If the returned string is too long it will be truncated. See __str__().
973
974 """
975 length = self.len
976 if isinstance(self._datastore._rawarray, MmapByteArray):
977 offsetstring = ''
978 if self._datastore.byteoffset or self._offset:
979 offsetstring = ", offset=%d" % (self._datastore._rawarray.byteoffset * 8 + self._offset)
980 lengthstring = ", length=%d" % length
981 return "{0}(filename='{1}'{2}{3})".format(self.__class__.__name__,
982 self._datastore._rawarray.source.name, lengthstring, offsetstring)
983 else:
984 s = self.__str__()
985 lengthstring = ''
986 if s.endswith('...'):
987 lengthstring = " # length={0}".format(length)
988 return "{0}('{1}'){2}".format(self.__class__.__name__, s, lengthstring)
989
990 def __eq__(self, bs):
991 """Return True if two bitstrings have the same binary representation.
992
993 >>> BitArray('0b1110') == '0xe'
994 True
995
996 """
997 try:
998 bs = Bits(bs)
999 except TypeError:
1000 return False
1001 return equal(self._datastore, bs._datastore)
1002
1003 def __ne__(self, bs):
1004 """Return False if two bitstrings have the same binary representation.
1005
1006 >>> BitArray('0b111') == '0x7'
1007 False
1008
1009 """
1010 return not self.__eq__(bs)
1011
1012 def __invert__(self):
1013 """Return bitstring with every bit inverted.
1014
1015 Raises Error if the bitstring is empty.
1016
1017 """
1018 if not self.len:
1019 raise Error("Cannot invert empty bitstring.")
1020 s = self._copy()
1021 s._invert_all()
1022 return s
1023
1024 def __lshift__(self, n):
1025 """Return bitstring with bits shifted by n to the left.
1026
1027 n -- the number of bits to shift. Must be >= 0.
1028
1029 """
1030 if n < 0:
1031 raise ValueError("Cannot shift by a negative amount.")
1032 if not self.len:
1033 raise ValueError("Cannot shift an empty bitstring.")
1034 n = min(n, self.len)
1035 s = self._slice(n, self.len)
1036 s._append(Bits(n))
1037 return s
1038
1039 def __rshift__(self, n):
1040 """Return bitstring with bits shifted by n to the right.
1041
1042 n -- the number of bits to shift. Must be >= 0.
1043
1044 """
1045 if n < 0:
1046 raise ValueError("Cannot shift by a negative amount.")
1047 if not self.len:
1048 raise ValueError("Cannot shift an empty bitstring.")
1049 if not n:
1050 return self._copy()
1051 s = self.__class__(length=min(n, self.len))
1052 s._append(self[:-n])
1053 return s
1054
1055 def __mul__(self, n):
1056 """Return bitstring consisting of n concatenations of self.
1057
1058 Called for expression of the form 'a = b*3'.
1059 n -- The number of concatenations. Must be >= 0.
1060
1061 """
1062 if n < 0:
1063 raise ValueError("Cannot multiply by a negative integer.")
1064 if not n:
1065 return self.__class__()
1066 s = self._copy()
1067 s._imul(n)
1068 return s
1069
1070 def __rmul__(self, n):
1071 """Return bitstring consisting of n concatenations of self.
1072
1073 Called for expressions of the form 'a = 3*b'.
1074 n -- The number of concatenations. Must be >= 0.
1075
1076 """
1077 return self.__mul__(n)
1078
1079 def __and__(self, bs):
1080 """Bit-wise 'and' between two bitstrings. Returns new bitstring.
1081
1082 bs -- The bitstring to '&' with.
1083
1084 Raises ValueError if the two bitstrings have differing lengths.
1085
1086 """
1087 bs = Bits(bs)
1088 if self.len != bs.len:
1089 raise ValueError("Bitstrings must have the same length "
1090 "for & operator.")
1091 s = self._copy()
1092 s._iand(bs)
1093 return s
1094
1095 def __rand__(self, bs):
1096 """Bit-wise 'and' between two bitstrings. Returns new bitstring.
1097
1098 bs -- the bitstring to '&' with.
1099
1100 Raises ValueError if the two bitstrings have differing lengths.
1101
1102 """
1103 return self.__and__(bs)
1104
1105 def __or__(self, bs):
1106 """Bit-wise 'or' between two bitstrings. Returns new bitstring.
1107
1108 bs -- The bitstring to '|' with.
1109
1110 Raises ValueError if the two bitstrings have differing lengths.
1111
1112 """
1113 bs = Bits(bs)
1114 if self.len != bs.len:
1115 raise ValueError("Bitstrings must have the same length "
1116 "for | operator.")
1117 s = self._copy()
1118 s._ior(bs)
1119 return s
1120
1121 def __ror__(self, bs):
1122 """Bit-wise 'or' between two bitstrings. Returns new bitstring.
1123
1124 bs -- The bitstring to '|' with.
1125
1126 Raises ValueError if the two bitstrings have differing lengths.
1127
1128 """
1129 return self.__or__(bs)
1130
1131 def __xor__(self, bs):
1132 """Bit-wise 'xor' between two bitstrings. Returns new bitstring.
1133
1134 bs -- The bitstring to '^' with.
1135
1136 Raises ValueError if the two bitstrings have differing lengths.
1137
1138 """
1139 bs = Bits(bs)
1140 if self.len != bs.len:
1141 raise ValueError("Bitstrings must have the same length "
1142 "for ^ operator.")
1143 s = self._copy()
1144 s._ixor(bs)
1145 return s
1146
1147 def __rxor__(self, bs):
1148 """Bit-wise 'xor' between two bitstrings. Returns new bitstring.
1149
1150 bs -- The bitstring to '^' with.
1151
1152 Raises ValueError if the two bitstrings have differing lengths.
1153
1154 """
1155 return self.__xor__(bs)
1156
1157 def __contains__(self, bs):
1158 """Return whether bs is contained in the current bitstring.
1159
1160 bs -- The bitstring to search for.
1161
1162 """
1163 # Don't want to change pos
1164 try:
1165 pos = self._pos
1166 except AttributeError:
1167 pass
1168 found = Bits.find(self, bs, bytealigned=False)
1169 try:
1170 self._pos = pos
1171 except AttributeError:
1172 pass
1173 return bool(found)
1174
1175 def __hash__(self):
1176 """Return an integer hash of the object."""
1177 # We can't in general hash the whole bitstring (it could take hours!)
1178 # So instead take some bits from the start and end.
1179 if self.len <= 160:
1180 # Use the whole bitstring.
1181 shorter = self
1182 else:
1183 # Take 10 bytes from start and end
1184 shorter = self[:80] + self[-80:]
1185 h = 0
1186 for byte in shorter.tobytes():
1187 try:
1188 h = (h << 4) + ord(byte)
1189 except TypeError:
1190 # Python 3
1191 h = (h << 4) + byte
1192 g = h & 0xf0000000
1193 if g & (1 << 31):
1194 h ^= (g >> 24)
1195 h ^= g
1196 return h % 1442968193
1197
1198 # This is only used in Python 2.x...
1199 def __nonzero__(self):
1200 """Return True if any bits are set to 1, otherwise return False."""
1201 return self.any(True)
1202
1203 # ...whereas this is used in Python 3.x
1204 __bool__ = __nonzero__
1205
1206 def _assertsanity(self):
1207 """Check internal self consistency as a debugging aid."""
1208 assert self.len >= 0
1209 assert 0 <= self._offset, "offset={0}".format(self._offset)
1210 assert (self.len + self._offset + 7) // 8 == self._datastore.bytelength + self._datastore.byteoffset
1211 return True
1212
1213 @classmethod
1214 def _init_with_token(cls, name, token_length, value):
1215 if token_length is not None:
1216 token_length = int(token_length)
1217 if token_length == 0:
1218 return cls()
1219 # For pad token just return the length in zero bits
1220 if name == 'pad':
1221 return cls(token_length)
1222
1223 if value is None:
1224 if token_length is None:
1225 error = "Token has no value ({0}=???).".format(name)
1226 else:
1227 error = "Token has no value ({0}:{1}=???).".format(name, token_length)
1228 raise ValueError(error)
1229 try:
1230 b = cls(**{_tokenname_to_initialiser[name]: value})
1231 except KeyError:
1232 if name in ('se', 'ue', 'sie', 'uie'):
1233 b = cls(**{name: int(value)})
1234 elif name in ('uint', 'int', 'uintbe', 'intbe', 'uintle', 'intle', 'uintne', 'intne'):
1235 b = cls(**{name: int(value), 'length': token_length})
1236 elif name in ('float', 'floatbe', 'floatle', 'floatne'):
1237 b = cls(**{name: float(value), 'length': token_length})
1238 elif name == 'bool':
1239 if value in (1, 'True', '1'):
1240 b = cls(bool=True)
1241 elif value in (0, 'False', '0'):
1242 b = cls(bool=False)
1243 else:
1244 raise CreationError("bool token can only be 'True' or 'False'.")
1245 else:
1246 raise CreationError("Can't parse token name {0}.", name)
1247 if token_length is not None and b.len != token_length:
1248 msg = "Token with length {0} packed with value of length {1} ({2}:{3}={4})."
1249 raise CreationError(msg, token_length, b.len, name, token_length, value)
1250 return b
1251
1252 def _clear(self):
1253 """Reset the bitstring to an empty state."""
1254 self._datastore = ByteStore(bytearray(0))
1255
1256 def _setauto(self, s, length, offset):
1257 """Set bitstring from a bitstring, file, bool, integer, array, iterable or string."""
1258 # As s can be so many different things it's important to do the checks
1259 # in the correct order, as some types are also other allowed types.
1260 # So basestring must be checked before Iterable
1261 # and bytes/bytearray before Iterable but after basestring!
1262 if isinstance(s, Bits):
1263 if length is None:
1264 length = s.len - offset
1265 self._setbytes_unsafe(s._datastore.rawbytes, length, s._offset + offset)
1266 return
1267 if isinstance(s, file):
1268 if offset is None:
1269 offset = 0
1270 if length is None:
1271 length = os.path.getsize(s.name) * 8 - offset
1272 byteoffset, offset = divmod(offset, 8)
1273 bytelength = (length + byteoffset * 8 + offset + 7) // 8 - byteoffset
1274 m = MmapByteArray(s, bytelength, byteoffset)
1275 if length + byteoffset * 8 + offset > m.filelength * 8:
1276 raise CreationError("File is not long enough for specified "
1277 "length and offset.")
1278 self._datastore = ConstByteStore(m, length, offset)
1279 return
1280 if length is not None:
1281 raise CreationError("The length keyword isn't applicable to this initialiser.")
1282 if offset:
1283 raise CreationError("The offset keyword isn't applicable to this initialiser.")
1284 if isinstance(s, basestring):
1285 bs = self._converttobitstring(s)
1286 assert bs._offset == 0
1287 self._setbytes_unsafe(bs._datastore.rawbytes, bs.length, 0)
1288 return
1289 if isinstance(s, (bytes, bytearray)):
1290 self._setbytes_unsafe(bytearray(s), len(s) * 8, 0)
1291 return
1292 if isinstance(s, array.array):
1293 b = s.tostring()
1294 self._setbytes_unsafe(bytearray(b), len(b) * 8, 0)
1295 return
1296 if isinstance(s, numbers.Integral):
1297 # Initialise with s zero bits.
1298 if s < 0:
1299 msg = "Can't create bitstring of negative length {0}."
1300 raise CreationError(msg, s)
1301 data = bytearray((s + 7) // 8)
1302 self._datastore = ByteStore(data, s, 0)
1303 return
1304 if isinstance(s, collections.abc.Iterable):
1305 # Evaluate each item as True or False and set bits to 1 or 0.
1306 self._setbin_unsafe(''.join(str(int(bool(x))) for x in s))
1307 return
1308 raise TypeError("Cannot initialise bitstring from {0}.".format(type(s)))
1309
1310 def _setfile(self, filename, length, offset):
1311 """Use file as source of bits."""
1312 source = open(filename, 'rb')
1313 if offset is None:
1314 offset = 0
1315 if length is None:
1316 length = os.path.getsize(source.name) * 8 - offset
1317 byteoffset, offset = divmod(offset, 8)
1318 bytelength = (length + byteoffset * 8 + offset + 7) // 8 - byteoffset
1319 m = MmapByteArray(source, bytelength, byteoffset)
1320 if length + byteoffset * 8 + offset > m.filelength * 8:
1321 raise CreationError("File is not long enough for specified "
1322 "length and offset.")
1323 self._datastore = ConstByteStore(m, length, offset)
1324
1325 def _setbytes_safe(self, data, length=None, offset=0):
1326 """Set the data from a string."""
1327 data = bytearray(data)
1328 if length is None:
1329 # Use to the end of the data
1330 length = len(data) * 8 - offset
1331 self._datastore = ByteStore(data, length, offset)
1332 else:
1333 if length + offset > len(data) * 8:
1334 msg = "Not enough data present. Need {0} bits, have {1}."
1335 raise CreationError(msg, length + offset, len(data) * 8)
1336 if length == 0:
1337 self._datastore = ByteStore(bytearray(0))
1338 else:
1339 self._datastore = ByteStore(data, length, offset)
1340
1341 def _setbytes_unsafe(self, data, length, offset):
1342 """Unchecked version of _setbytes_safe."""
1343 self._datastore = ByteStore(data[:], length, offset)
1344 assert self._assertsanity()
1345
1346 def _readbytes(self, length, start):
1347 """Read bytes and return them. Note that length is in bits."""
1348 assert length % 8 == 0
1349 assert start + length <= self.len
1350 if not (start + self._offset) % 8:
1351 return bytes(self._datastore.getbyteslice((start + self._offset) // 8,
1352 (start + self._offset + length) // 8))
1353 return self._slice(start, start + length).tobytes()
1354
1355 def _getbytes(self):
1356 """Return the data as an ordinary string."""
1357 if self.len % 8:
1358 raise InterpretError("Cannot interpret as bytes unambiguously - "
1359 "not multiple of 8 bits.")
1360 return self._readbytes(self.len, 0)
1361
1362 def _setuint(self, uint, length=None):
1363 """Reset the bitstring to have given unsigned int interpretation."""
1364 try:
1365 if length is None:
1366 # Use the whole length. Deliberately not using .len here.
1367 length = self._datastore.bitlength
1368 except AttributeError:
1369 # bitstring doesn't have a _datastore as it hasn't been created!
1370 pass
1371 # TODO: All this checking code should be hoisted out of here!
1372 if length is None or length == 0:
1373 raise CreationError("A non-zero length must be specified with a "
1374 "uint initialiser.")
1375 if uint >= (1 << length):
1376 msg = "{0} is too large an unsigned integer for a bitstring of length {1}. "\
1377 "The allowed range is [0, {2}]."
1378 raise CreationError(msg, uint, length, (1 << length) - 1)
1379 if uint < 0:
1380 raise CreationError("uint cannot be initialsed by a negative number.")
1381 s = hex(uint)[2:]
1382 s = s.rstrip('L')
1383 if len(s) & 1:
1384 s = '0' + s
1385 try:
1386 data = bytes.fromhex(s)
1387 except AttributeError:
1388 # the Python 2.x way
1389 data = binascii.unhexlify(s)
1390 # Now add bytes as needed to get the right length.
1391 extrabytes = ((length + 7) // 8) - len(data)
1392 if extrabytes > 0:
1393 data = b'\x00' * extrabytes + data
1394 offset = 8 - (length % 8)
1395 if offset == 8:
1396 offset = 0
1397 self._setbytes_unsafe(bytearray(data), length, offset)
1398
1399 def _readuint(self, length, start):
1400 """Read bits and interpret as an unsigned int."""
1401 if not length:
1402 raise InterpretError("Cannot interpret a zero length bitstring "
1403 "as an integer.")
1404 offset = self._offset
1405 startbyte = (start + offset) // 8
1406 endbyte = (start + offset + length - 1) // 8
1407
1408 b = binascii.hexlify(bytes(self._datastore.getbyteslice(startbyte, endbyte + 1)))
1409 assert b
1410 i = int(b, 16)
1411 final_bits = 8 - ((start + offset + length) % 8)
1412 if final_bits != 8:
1413 i >>= final_bits
1414 i &= (1 << length) - 1
1415 return i
1416
1417 def _getuint(self):
1418 """Return data as an unsigned int."""
1419 return self._readuint(self.len, 0)
1420
1421 def _setint(self, int_, length=None):
1422 """Reset the bitstring to have given signed int interpretation."""
1423 # If no length given, and we've previously been given a length, use it.
1424 if length is None and hasattr(self, 'len') and self.len != 0:
1425 length = self.len
1426 if length is None or length == 0:
1427 raise CreationError("A non-zero length must be specified with an int initialiser.")
1428 if int_ >= (1 << (length - 1)) or int_ < -(1 << (length - 1)):
1429 raise CreationError("{0} is too large a signed integer for a bitstring of length {1}. "
1430 "The allowed range is [{2}, {3}].", int_, length, -(1 << (length - 1)),
1431 (1 << (length - 1)) - 1)
1432 if int_ >= 0:
1433 self._setuint(int_, length)
1434 return
1435 # TODO: We should decide whether to just use the _setuint, or to do the bit flipping,
1436 # based upon which will be quicker. If the -ive number is less than half the maximum
1437 # possible then it's probably quicker to do the bit flipping...
1438
1439 # Do the 2's complement thing. Add one, set to minus number, then flip bits.
1440 int_ += 1
1441 self._setuint(-int_, length)
1442 self._invert_all()
1443
1444 def _readint(self, length, start):
1445 """Read bits and interpret as a signed int"""
1446 ui = self._readuint(length, start)
1447 if not ui >> (length - 1):
1448 # Top bit not set, number is positive
1449 return ui
1450 # Top bit is set, so number is negative
1451 tmp = (~(ui - 1)) & ((1 << length) - 1)
1452 return -tmp
1453
1454 def _getint(self):
1455 """Return data as a two's complement signed int."""
1456 return self._readint(self.len, 0)
1457
1458 def _setuintbe(self, uintbe, length=None):
1459 """Set the bitstring to a big-endian unsigned int interpretation."""
1460 if length is not None and length % 8 != 0:
1461 raise CreationError("Big-endian integers must be whole-byte. "
1462 "Length = {0} bits.", length)
1463 self._setuint(uintbe, length)
1464
1465 def _readuintbe(self, length, start):
1466 """Read bits and interpret as a big-endian unsigned int."""
1467 if length % 8:
1468 raise InterpretError("Big-endian integers must be whole-byte. "
1469 "Length = {0} bits.", length)
1470 return self._readuint(length, start)
1471
1472 def _getuintbe(self):
1473 """Return data as a big-endian two's complement unsigned int."""
1474 return self._readuintbe(self.len, 0)
1475
1476 def _setintbe(self, intbe, length=None):
1477 """Set bitstring to a big-endian signed int interpretation."""
1478 if length is not None and length % 8 != 0:
1479 raise CreationError("Big-endian integers must be whole-byte. "
1480 "Length = {0} bits.", length)
1481 self._setint(intbe, length)
1482
1483 def _readintbe(self, length, start):
1484 """Read bits and interpret as a big-endian signed int."""
1485 if length % 8:
1486 raise InterpretError("Big-endian integers must be whole-byte. "
1487 "Length = {0} bits.", length)
1488 return self._readint(length, start)
1489
1490 def _getintbe(self):
1491 """Return data as a big-endian two's complement signed int."""
1492 return self._readintbe(self.len, 0)
1493
1494 def _setuintle(self, uintle, length=None):
1495 if length is not None and length % 8 != 0:
1496 raise CreationError("Little-endian integers must be whole-byte. "
1497 "Length = {0} bits.", length)
1498 self._setuint(uintle, length)
1499 self._reversebytes(0, self.len)
1500
1501 def _readuintle(self, length, start):
1502 """Read bits and interpret as a little-endian unsigned int."""
1503 if length % 8:
1504 raise InterpretError("Little-endian integers must be whole-byte. "
1505 "Length = {0} bits.", length)
1506 assert start + length <= self.len
1507 absolute_pos = start + self._offset
1508 startbyte, offset = divmod(absolute_pos, 8)
1509 val = 0
1510 if not offset:
1511 endbyte = (absolute_pos + length - 1) // 8
1512 chunksize = 4 # for 'L' format
1513 while endbyte - chunksize + 1 >= startbyte:
1514 val <<= 8 * chunksize
1515 val += struct.unpack('<L', bytes(self._datastore.getbyteslice(endbyte + 1 - chunksize, endbyte + 1)))[0]
1516 endbyte -= chunksize
1517 for b in xrange(endbyte, startbyte - 1, -1):
1518 val <<= 8
1519 val += self._datastore.getbyte(b)
1520 else:
1521 data = self._slice(start, start + length)
1522 assert data.len % 8 == 0
1523 data._reversebytes(0, self.len)
1524 for b in bytearray(data.bytes):
1525 val <<= 8
1526 val += b
1527 return val
1528
1529 def _getuintle(self):
1530 return self._readuintle(self.len, 0)
1531
1532 def _setintle(self, intle, length=None):
1533 if length is not None and length % 8 != 0:
1534 raise CreationError("Little-endian integers must be whole-byte. "
1535 "Length = {0} bits.", length)
1536 self._setint(intle, length)
1537 self._reversebytes(0, self.len)
1538
1539 def _readintle(self, length, start):
1540 """Read bits and interpret as a little-endian signed int."""
1541 ui = self._readuintle(length, start)
1542 if not ui >> (length - 1):
1543 # Top bit not set, number is positive
1544 return ui
1545 # Top bit is set, so number is negative
1546 tmp = (~(ui - 1)) & ((1 << length) - 1)
1547 return -tmp
1548
1549 def _getintle(self):
1550 return self._readintle(self.len, 0)
1551
1552 def _setfloat(self, f, length=None):
1553 # If no length given, and we've previously been given a length, use it.
1554 if length is None and hasattr(self, 'len') and self.len != 0:
1555 length = self.len
1556 if length is None or length == 0:
1557 raise CreationError("A non-zero length must be specified with a "
1558 "float initialiser.")
1559 if length == 32:
1560 b = struct.pack('>f', f)
1561 elif length == 64:
1562 b = struct.pack('>d', f)
1563 else:
1564 raise CreationError("floats can only be 32 or 64 bits long, "
1565 "not {0} bits", length)
1566 self._setbytes_unsafe(bytearray(b), length, 0)
1567
1568 def _readfloat(self, length, start):
1569 """Read bits and interpret as a float."""
1570 if not (start + self._offset) % 8:
1571 startbyte = (start + self._offset) // 8
1572 if length == 32:
1573 f, = struct.unpack('>f', bytes(self._datastore.getbyteslice(startbyte, startbyte + 4)))
1574 elif length == 64:
1575 f, = struct.unpack('>d', bytes(self._datastore.getbyteslice(startbyte, startbyte + 8)))
1576 else:
1577 if length == 32:
1578 f, = struct.unpack('>f', self._readbytes(32, start))
1579 elif length == 64:
1580 f, = struct.unpack('>d', self._readbytes(64, start))
1581 try:
1582 return f
1583 except NameError:
1584 raise InterpretError("floats can only be 32 or 64 bits long, not {0} bits", length)
1585
1586 def _getfloat(self):
1587 """Interpret the whole bitstring as a float."""
1588 return self._readfloat(self.len, 0)
1589
1590 def _setfloatle(self, f, length=None):
1591 # If no length given, and we've previously been given a length, use it.
1592 if length is None and hasattr(self, 'len') and self.len != 0:
1593 length = self.len
1594 if length is None or length == 0:
1595 raise CreationError("A non-zero length must be specified with a "
1596 "float initialiser.")
1597 if length == 32:
1598 b = struct.pack('<f', f)
1599 elif length == 64:
1600 b = struct.pack('<d', f)
1601 else:
1602 raise CreationError("floats can only be 32 or 64 bits long, "
1603 "not {0} bits", length)
1604 self._setbytes_unsafe(bytearray(b), length, 0)
1605
1606 def _readfloatle(self, length, start):
1607 """Read bits and interpret as a little-endian float."""
1608 startbyte, offset = divmod(start + self._offset, 8)
1609 if not offset:
1610 if length == 32:
1611 f, = struct.unpack('<f', bytes(self._datastore.getbyteslice(startbyte, startbyte + 4)))
1612 elif length == 64:
1613 f, = struct.unpack('<d', bytes(self._datastore.getbyteslice(startbyte, startbyte + 8)))
1614 else:
1615 if length == 32:
1616 f, = struct.unpack('<f', self._readbytes(32, start))
1617 elif length == 64:
1618 f, = struct.unpack('<d', self._readbytes(64, start))
1619 try:
1620 return f
1621 except NameError:
1622 raise InterpretError("floats can only be 32 or 64 bits long, "
1623 "not {0} bits", length)
1624
1625 def _getfloatle(self):
1626 """Interpret the whole bitstring as a little-endian float."""
1627 return self._readfloatle(self.len, 0)
1628
1629 def _setue(self, i):
1630 """Initialise bitstring with unsigned exponential-Golomb code for integer i.
1631
1632 Raises CreationError if i < 0.
1633
1634 """
1635 if i < 0:
1636 raise CreationError("Cannot use negative initialiser for unsigned "
1637 "exponential-Golomb.")
1638 if not i:
1639 self._setbin_unsafe('1')
1640 return
1641 tmp = i + 1
1642 leadingzeros = -1
1643 while tmp > 0:
1644 tmp >>= 1
1645 leadingzeros += 1
1646 remainingpart = i + 1 - (1 << leadingzeros)
1647 binstring = '0' * leadingzeros + '1' + Bits(uint=remainingpart,
1648 length=leadingzeros).bin
1649 self._setbin_unsafe(binstring)
1650
1651 def _readue(self, pos):
1652 """Return interpretation of next bits as unsigned exponential-Golomb code.
1653
1654 Raises ReadError if the end of the bitstring is encountered while
1655 reading the code.
1656
1657 """
1658 oldpos = pos
1659 try:
1660 while not self[pos]:
1661 pos += 1
1662 except IndexError:
1663 raise ReadError("Read off end of bitstring trying to read code.")
1664 leadingzeros = pos - oldpos
1665 codenum = (1 << leadingzeros) - 1
1666 if leadingzeros > 0:
1667 if pos + leadingzeros + 1 > self.len:
1668 raise ReadError("Read off end of bitstring trying to read code.")
1669 codenum += self._readuint(leadingzeros, pos + 1)
1670 pos += leadingzeros + 1
1671 else:
1672 assert codenum == 0
1673 pos += 1
1674 return codenum, pos
1675
1676 def _getue(self):
1677 """Return data as unsigned exponential-Golomb code.
1678
1679 Raises InterpretError if bitstring is not a single exponential-Golomb code.
1680
1681 """
1682 try:
1683 value, newpos = self._readue(0)
1684 if value is None or newpos != self.len:
1685 raise ReadError
1686 except ReadError:
1687 raise InterpretError("Bitstring is not a single exponential-Golomb code.")
1688 return value
1689
1690 def _setse(self, i):
1691 """Initialise bitstring with signed exponential-Golomb code for integer i."""
1692 if i > 0:
1693 u = (i * 2) - 1
1694 else:
1695 u = -2 * i
1696 self._setue(u)
1697
1698 def _getse(self):
1699 """Return data as signed exponential-Golomb code.
1700
1701 Raises InterpretError if bitstring is not a single exponential-Golomb code.
1702
1703 """
1704 try:
1705 value, newpos = self._readse(0)
1706 if value is None or newpos != self.len:
1707 raise ReadError
1708 except ReadError:
1709 raise InterpretError("Bitstring is not a single exponential-Golomb code.")
1710 return value
1711
1712 def _readse(self, pos):
1713 """Return interpretation of next bits as a signed exponential-Golomb code.
1714
1715 Advances position to after the read code.
1716
1717 Raises ReadError if the end of the bitstring is encountered while
1718 reading the code.
1719
1720 """
1721 codenum, pos = self._readue(pos)
1722 m = (codenum + 1) // 2
1723 if not codenum % 2:
1724 return -m, pos
1725 else:
1726 return m, pos
1727
1728 def _setuie(self, i):
1729 """Initialise bitstring with unsigned interleaved exponential-Golomb code for integer i.
1730
1731 Raises CreationError if i < 0.
1732
1733 """
1734 if i < 0:
1735 raise CreationError("Cannot use negative initialiser for unsigned "
1736 "interleaved exponential-Golomb.")
1737 self._setbin_unsafe('1' if i == 0 else '0' + '0'.join(bin(i + 1)[3:]) + '1')
1738
1739 def _readuie(self, pos):
1740 """Return interpretation of next bits as unsigned interleaved exponential-Golomb code.
1741
1742 Raises ReadError if the end of the bitstring is encountered while
1743 reading the code.
1744
1745 """
1746 try:
1747 codenum = 1
1748 while not self[pos]:
1749 pos += 1
1750 codenum <<= 1
1751 codenum += self[pos]
1752 pos += 1
1753 pos += 1
1754 except IndexError:
1755 raise ReadError("Read off end of bitstring trying to read code.")
1756 codenum -= 1
1757 return codenum, pos
1758
1759 def _getuie(self):
1760 """Return data as unsigned interleaved exponential-Golomb code.
1761
1762 Raises InterpretError if bitstring is not a single exponential-Golomb code.
1763
1764 """
1765 try:
1766 value, newpos = self._readuie(0)
1767 if value is None or newpos != self.len:
1768 raise ReadError
1769 except ReadError:
1770 raise InterpretError("Bitstring is not a single interleaved exponential-Golomb code.")
1771 return value
1772
1773 def _setsie(self, i):
1774 """Initialise bitstring with signed interleaved exponential-Golomb code for integer i."""
1775 if not i:
1776 self._setbin_unsafe('1')
1777 else:
1778 self._setuie(abs(i))
1779 self._append(Bits([i < 0]))
1780
1781 def _getsie(self):
1782 """Return data as signed interleaved exponential-Golomb code.
1783
1784 Raises InterpretError if bitstring is not a single exponential-Golomb code.
1785
1786 """
1787 try:
1788 value, newpos = self._readsie(0)
1789 if value is None or newpos != self.len:
1790 raise ReadError
1791 except ReadError:
1792 raise InterpretError("Bitstring is not a single interleaved exponential-Golomb code.")
1793 return value
1794
1795 def _readsie(self, pos):
1796 """Return interpretation of next bits as a signed interleaved exponential-Golomb code.
1797
1798 Advances position to after the read code.
1799
1800 Raises ReadError if the end of the bitstring is encountered while
1801 reading the code.
1802
1803 """
1804 codenum, pos = self._readuie(pos)
1805 if not codenum:
1806 return 0, pos
1807 try:
1808 if self[pos]:
1809 return -codenum, pos + 1
1810 else:
1811 return codenum, pos + 1
1812 except IndexError:
1813 raise ReadError("Read off end of bitstring trying to read code.")
1814
1815 def _setbool(self, value):
1816 # We deliberately don't want to have implicit conversions to bool here.
1817 # If we did then it would be difficult to deal with the 'False' string.
1818 if value in (1, 'True'):
1819 self._setbytes_unsafe(bytearray(b'\x80'), 1, 0)
1820 elif value in (0, 'False'):
1821 self._setbytes_unsafe(bytearray(b'\x00'), 1, 0)
1822 else:
1823 raise CreationError('Cannot initialise boolean with {0}.', value)
1824
1825 def _getbool(self):
1826 if self.length != 1:
1827 msg = "For a bool interpretation a bitstring must be 1 bit long, not {0} bits."
1828 raise InterpretError(msg, self.length)
1829 return self[0]
1830
1831 def _readbool(self, pos):
1832 return self[pos], pos + 1
1833
1834 def _setbin_safe(self, binstring):
1835 """Reset the bitstring to the value given in binstring."""
1836 binstring = tidy_input_string(binstring)
1837 # remove any 0b if present
1838 binstring = binstring.replace('0b', '')
1839 self._setbin_unsafe(binstring)
1840
1841 def _setbin_unsafe(self, binstring):
1842 """Same as _setbin_safe, but input isn't sanity checked. binstring mustn't start with '0b'."""
1843 length = len(binstring)
1844 # pad with zeros up to byte boundary if needed
1845 boundary = ((length + 7) // 8) * 8
1846 padded_binstring = binstring + '0' * (boundary - length)\
1847 if len(binstring) < boundary else binstring
1848 try:
1849 bytelist = [int(padded_binstring[x:x + 8], 2)
1850 for x in xrange(0, len(padded_binstring), 8)]
1851 except ValueError:
1852 raise CreationError("Invalid character in bin initialiser {0}.", binstring)
1853 self._setbytes_unsafe(bytearray(bytelist), length, 0)
1854
1855 def _readbin(self, length, start):
1856 """Read bits and interpret as a binary string."""
1857 if not length:
1858 return ''
1859 # Get the byte slice containing our bit slice
1860 startbyte, startoffset = divmod(start + self._offset, 8)
1861 endbyte = (start + self._offset + length - 1) // 8
1862 b = self._datastore.getbyteslice(startbyte, endbyte + 1)
1863 # Convert to a string of '0' and '1's (via a hex string an and int!)
1864 try:
1865 c = "{:0{}b}".format(int(binascii.hexlify(b), 16), 8 * len(b))
1866 except TypeError:
1867 # Hack to get Python 2.6 working
1868 c = "{0:0{1}b}".format(int(binascii.hexlify(str(b)), 16), 8 * len(b))
1869 # Finally chop off any extra bits.
1870 return c[startoffset:startoffset + length]
1871
1872 def _getbin(self):
1873 """Return interpretation as a binary string."""
1874 return self._readbin(self.len, 0)
1875
1876 def _setoct(self, octstring):
1877 """Reset the bitstring to have the value given in octstring."""
1878 octstring = tidy_input_string(octstring)
1879 # remove any 0o if present
1880 octstring = octstring.replace('0o', '')
1881 binlist = []
1882 for i in octstring:
1883 try:
1884 if not 0 <= int(i) < 8:
1885 raise ValueError
1886 binlist.append(OCT_TO_BITS[int(i)])
1887 except ValueError:
1888 raise CreationError("Invalid symbol '{0}' in oct initialiser.", i)
1889 self._setbin_unsafe(''.join(binlist))
1890
1891 def _readoct(self, length, start):
1892 """Read bits and interpret as an octal string."""
1893 if length % 3:
1894 raise InterpretError("Cannot convert to octal unambiguously - "
1895 "not multiple of 3 bits.")
1896 if not length:
1897 return ''
1898 # Get main octal bit by converting from int.
1899 # Strip starting 0 or 0o depending on Python version.
1900 end = oct(self._readuint(length, start))[LEADING_OCT_CHARS:]
1901 if end.endswith('L'):
1902 end = end[:-1]
1903 middle = '0' * (length // 3 - len(end))
1904 return middle + end
1905
1906 def _getoct(self):
1907 """Return interpretation as an octal string."""
1908 return self._readoct(self.len, 0)
1909
1910 def _sethex(self, hexstring):
1911 """Reset the bitstring to have the value given in hexstring."""
1912 hexstring = tidy_input_string(hexstring)
1913 # remove any 0x if present
1914 hexstring = hexstring.replace('0x', '')
1915 length = len(hexstring)
1916 if length % 2:
1917 hexstring += '0'
1918 try:
1919 try:
1920 data = bytearray.fromhex(hexstring)
1921 except TypeError:
1922 # Python 2.6 needs a unicode string (a bug). 2.7 and 3.x work fine.
1923 data = bytearray.fromhex(unicode(hexstring)) # noqa
1924 except ValueError:
1925 raise CreationError("Invalid symbol in hex initialiser.")
1926 self._setbytes_unsafe(data, length * 4, 0)
1927
1928 def _readhex(self, length, start):
1929 """Read bits and interpret as a hex string."""
1930 if length % 4:
1931 raise InterpretError("Cannot convert to hex unambiguously - "
1932 "not multiple of 4 bits.")
1933 if not length:
1934 return ''
1935 s = self._slice(start, start + length).tobytes()
1936 try:
1937 s = s.hex() # Available in Python 3.5
1938 except AttributeError:
1939 # This monstrosity is the only thing I could get to work for both 2.6 and 3.1.
1940 # TODO: Is utf-8 really what we mean here?
1941 s = str(binascii.hexlify(s).decode('utf-8'))
1942 # If there's one nibble too many then cut it off
1943 return s[:-1] if (length // 4) % 2 else s
1944
1945 def _gethex(self):
1946 """Return the hexadecimal representation as a string prefixed with '0x'.
1947
1948 Raises an InterpretError if the bitstring's length is not a multiple of 4.
1949
1950 """
1951 return self._readhex(self.len, 0)
1952
1953 def _getoffset(self):
1954 return self._datastore.offset
1955
1956 def _getlength(self):
1957 """Return the length of the bitstring in bits."""
1958 return self._datastore.bitlength
1959
1961 """Ensure the data is held in memory, not in a file."""
1962 self._setbytes_unsafe(self._datastore.getbyteslice(0, self._datastore.bytelength),
1963 self.len, self._offset)
1964
1965 @classmethod
1966 def _converttobitstring(cls, bs, offset=0, cache=None):
1967 """Convert bs to a bitstring and return it.
1968
1969 offset gives the suggested bit offset of first significant
1970 bit, to optimise append etc.
1971
1972 """
1973 if cache is None:
1974 cache = {}
1975 if isinstance(bs, Bits):
1976 return bs
1977 try:
1978 return cache[(bs, offset)]
1979 except KeyError:
1980 if isinstance(bs, basestring):
1981 b = cls()
1982 try:
1983 _, tokens = tokenparser(bs)
1984 except ValueError as e:
1985 raise CreationError(*e.args)
1986 if tokens:
1987 b._append(Bits._init_with_token(*tokens[0]))
1988 b._datastore = offsetcopy(b._datastore, offset)
1989 for token in tokens[1:]:
1990 b._append(Bits._init_with_token(*token))
1991 assert b._assertsanity()
1992 assert b.len == 0 or b._offset == offset
1993 if len(cache) < CACHE_SIZE:
1994 cache[(bs, offset)] = b
1995 return b
1996 except TypeError:
1997 # Unhashable type
1998 pass
1999 return cls(bs)
2000
2001 def _copy(self):
2002 """Create and return a new copy of the Bits (always in memory)."""
2003 s_copy = self.__class__()
2004 s_copy._setbytes_unsafe(self._datastore.getbyteslice(0, self._datastore.bytelength),
2005 self.len, self._offset)
2006 return s_copy
2007
2008 def _slice(self, start, end):
2009 """Used internally to get a slice, without error checking."""
2010 if end == start:
2011 return self.__class__()
2012 offset = self._offset
2013 startbyte, newoffset = divmod(start + offset, 8)
2014 endbyte = (end + offset - 1) // 8
2015 bs = self.__class__()
2016 bs._setbytes_unsafe(self._datastore.getbyteslice(startbyte, endbyte + 1), end - start, newoffset)
2017 return bs
2018
2019 def _readtoken(self, name, pos, length):
2020 """Reads a token from the bitstring and returns the result."""
2021 if length is not None and int(length) > self.length - pos:
2022 raise ReadError("Reading off the end of the data. "
2023 "Tried to read {0} bits when only {1} available.".format(int(length), self.length - pos))
2024 try:
2025 val = name_to_read[name](self, length, pos)
2026 return val, pos + length
2027 except KeyError:
2028 if name == 'pad':
2029 return None, pos + length
2030 raise ValueError("Can't parse token {0}:{1}".format(name, length))
2031 except TypeError:
2032 # This is for the 'ue', 'se' and 'bool' tokens. They will also return the new pos.
2033 return name_to_read[name](self, pos)
2034
2035 def _append(self, bs):
2036 """Append a bitstring to the current bitstring."""
2037 self._datastore._appendstore(bs._datastore)
2038
2039 def _prepend(self, bs):
2040 """Prepend a bitstring to the current bitstring."""
2041 self._datastore._prependstore(bs._datastore)
2042
2043 def _reverse(self):
2044 """Reverse all bits in-place."""
2045 # Reverse the contents of each byte
2046 n = [BYTE_REVERSAL_DICT[b] for b in self._datastore.rawbytes]
2047 # Then reverse the order of the bytes
2048 n.reverse()
2049 # The new offset is the number of bits that were unused at the end.
2050 newoffset = 8 - (self._offset + self.len) % 8
2051 if newoffset == 8:
2052 newoffset = 0
2053 self._setbytes_unsafe(bytearray().join(n), self.length, newoffset)
2054
2055 def _truncatestart(self, bits):
2056 """Truncate bits from the start of the bitstring."""
2057 assert 0 <= bits <= self.len
2058 if not bits:
2059 return
2060 if bits == self.len:
2061 self._clear()
2062 return
2063 bytepos, offset = divmod(self._offset + bits, 8)
2064 self._setbytes_unsafe(self._datastore.getbyteslice(bytepos, self._datastore.bytelength), self.len - bits,
2065 offset)
2066 assert self._assertsanity()
2067
2068 def _truncateend(self, bits):
2069 """Truncate bits from the end of the bitstring."""
2070 assert 0 <= bits <= self.len
2071 if not bits:
2072 return
2073 if bits == self.len:
2074 self._clear()
2075 return
2076 newlength_in_bytes = (self._offset + self.len - bits + 7) // 8
2077 self._setbytes_unsafe(self._datastore.getbyteslice(0, newlength_in_bytes), self.len - bits,
2078 self._offset)
2079 assert self._assertsanity()
2080
2081 def _insert(self, bs, pos):
2082 """Insert bs at pos."""
2083 assert 0 <= pos <= self.len
2084 if pos > self.len // 2:
2085 # Inserting nearer end, so cut off end.
2086 end = self._slice(pos, self.len)
2087 self._truncateend(self.len - pos)
2088 self._append(bs)
2089 self._append(end)
2090 else:
2091 # Inserting nearer start, so cut off start.
2092 start = self._slice(0, pos)
2093 self._truncatestart(pos)
2094 self._prepend(bs)
2095 self._prepend(start)
2096 try:
2097 self._pos = pos + bs.len
2098 except AttributeError:
2099 pass
2100 assert self._assertsanity()
2101
2102 def _overwrite(self, bs, pos):
2103 """Overwrite with bs at pos."""
2104 assert 0 <= pos < self.len
2105 if bs is self:
2106 # Just overwriting with self, so do nothing.
2107 assert pos == 0
2108 return
2109 firstbytepos = (self._offset + pos) // 8
2110 lastbytepos = (self._offset + pos + bs.len - 1) // 8
2111 bytepos, bitoffset = divmod(self._offset + pos, 8)
2112 if firstbytepos == lastbytepos:
2113 mask = ((1 << bs.len) - 1) << (8 - bs.len - bitoffset)
2114 self._datastore.setbyte(bytepos, self._datastore.getbyte(bytepos) & (~mask))
2115 d = offsetcopy(bs._datastore, bitoffset)
2116 self._datastore.setbyte(bytepos, self._datastore.getbyte(bytepos) | (d.getbyte(0) & mask))
2117 else:
2118 # Do first byte
2119 mask = (1 << (8 - bitoffset)) - 1
2120 self._datastore.setbyte(bytepos, self._datastore.getbyte(bytepos) & (~mask))
2121 d = offsetcopy(bs._datastore, bitoffset)
2122 self._datastore.setbyte(bytepos, self._datastore.getbyte(bytepos) | (d.getbyte(0) & mask))
2123 # Now do all the full bytes
2124 self._datastore.setbyteslice(firstbytepos + 1, lastbytepos, d.getbyteslice(1, lastbytepos - firstbytepos))
2125 # and finally the last byte
2126 bitsleft = (self._offset + pos + bs.len) % 8
2127 if not bitsleft:
2128 bitsleft = 8
2129 mask = (1 << (8 - bitsleft)) - 1
2130 self._datastore.setbyte(lastbytepos, self._datastore.getbyte(lastbytepos) & mask)
2131 self._datastore.setbyte(lastbytepos,
2132 self._datastore.getbyte(lastbytepos) | (d.getbyte(d.bytelength - 1) & ~mask))
2133 assert self._assertsanity()
2134
2135 def _delete(self, bits, pos):
2136 """Delete bits at pos."""
2137 assert 0 <= pos <= self.len
2138 assert pos + bits <= self.len
2139 if not pos:
2140 # Cutting bits off at the start.
2141 self._truncatestart(bits)
2142 return
2143 if pos + bits == self.len:
2144 # Cutting bits off at the end.
2145 self._truncateend(bits)
2146 return
2147 if pos > self.len - pos - bits:
2148 # More bits before cut point than after it, so do bit shifting
2149 # on the final bits.
2150 end = self._slice(pos + bits, self.len)
2151 assert self.len - pos > 0
2152 self._truncateend(self.len - pos)
2153 self._append(end)
2154 return
2155 # More bits after the cut point than before it.
2156 start = self._slice(0, pos)
2157 self._truncatestart(pos + bits)
2158 self._prepend(start)
2159 return
2160
2161 def _reversebytes(self, start, end):
2162 """Reverse bytes in-place."""
2163 # Make the start occur on a byte boundary
2164 # TODO: We could be cleverer here to avoid changing the offset.
2165 newoffset = 8 - (start % 8)
2166 if newoffset == 8:
2167 newoffset = 0
2168 self._datastore = offsetcopy(self._datastore, newoffset)
2169 # Now just reverse the byte data
2170 toreverse = bytearray(self._datastore.getbyteslice((newoffset + start) // 8, (newoffset + end) // 8))
2171 toreverse.reverse()
2172 self._datastore.setbyteslice((newoffset + start) // 8, (newoffset + end) // 8, toreverse)
2173
2174 def _set(self, pos):
2175 """Set bit at pos to 1."""
2176 assert 0 <= pos < self.len
2177 self._datastore.setbit(pos)
2178
2179 def _unset(self, pos):
2180 """Set bit at pos to 0."""
2181 assert 0 <= pos < self.len
2182 self._datastore.unsetbit(pos)
2183
2184 def _invert(self, pos):
2185 """Flip bit at pos 1<->0."""
2186 assert 0 <= pos < self.len
2187 self._datastore.invertbit(pos)
2188
2189 def _invert_all(self):
2190 """Invert every bit."""
2191 set = self._datastore.setbyte
2192 get = self._datastore.getbyte
2193 for p in xrange(self._datastore.byteoffset, self._datastore.byteoffset + self._datastore.bytelength):
2194 set(p, 256 + ~get(p))
2195
2196 def _ilshift(self, n):
2197 """Shift bits by n to the left in place. Return self."""
2198 assert 0 < n <= self.len
2199 self._append(Bits(n))
2200 self._truncatestart(n)
2201 return self
2202
2203 def _irshift(self, n):
2204 """Shift bits by n to the right in place. Return self."""
2205 assert 0 < n <= self.len
2206 self._prepend(Bits(n))
2207 self._truncateend(n)
2208 return self
2209
2210 def _imul(self, n):
2211 """Concatenate n copies of self in place. Return self."""
2212 assert n >= 0
2213 if not n:
2214 self._clear()
2215 return self
2216 m = 1
2217 old_len = self.len
2218 while m * 2 < n:
2219 self._append(self)
2220 m *= 2
2221 self._append(self[0:(n - m) * old_len])
2222 return self
2223
2224 def _inplace_logical_helper(self, bs, f):
2225 """Helper function containing most of the __ior__, __iand__, __ixor__ code."""
2226 # Give the two bitstrings the same offset (modulo 8)
2227 self_byteoffset, self_bitoffset = divmod(self._offset, 8)
2228 bs_byteoffset, bs_bitoffset = divmod(bs._offset, 8)
2229 if bs_bitoffset != self_bitoffset:
2230 if not self_bitoffset:
2231 bs._datastore = offsetcopy(bs._datastore, 0)
2232 else:
2233 self._datastore = offsetcopy(self._datastore, bs_bitoffset)
2234 a = self._datastore.rawbytes
2235 b = bs._datastore.rawbytes
2236 for i in xrange(len(a)):
2237 a[i] = f(a[i + self_byteoffset], b[i + bs_byteoffset])
2238 return self
2239
2240 def _ior(self, bs):
2241 return self._inplace_logical_helper(bs, operator.ior)
2242
2243 def _iand(self, bs):
2244 return self._inplace_logical_helper(bs, operator.iand)
2245
2246 def _ixor(self, bs):
2247 return self._inplace_logical_helper(bs, operator.xor)
2248
2249 def _readbits(self, length, start):
2250 """Read some bits from the bitstring and return newly constructed bitstring."""
2251 return self._slice(start, start + length)
2252
2253 def _validate_slice(self, start, end):
2254 """Validate start and end and return them as positive bit positions."""
2255 if start is None:
2256 start = 0
2257 elif start < 0:
2258 start += self.len
2259 if end is None:
2260 end = self.len
2261 elif end < 0:
2262 end += self.len
2263 if not 0 <= end <= self.len:
2264 raise ValueError("end is not a valid position in the bitstring.")
2265 if not 0 <= start <= self.len:
2266 raise ValueError("start is not a valid position in the bitstring.")
2267 if end < start:
2268 raise ValueError("end must not be less than start.")
2269 return start, end
2270
2271 def unpack(self, fmt, **kwargs):
2272 """Interpret the whole bitstring using fmt and return list.
2273
2274 fmt -- A single string or a list of strings with comma separated tokens
2275 describing how to interpret the bits in the bitstring. Items
2276 can also be integers, for reading new bitstring of the given length.
2277 kwargs -- A dictionary or keyword-value pairs - the keywords used in the
2278 format string will be replaced with their given value.
2279
2280 Raises ValueError if the format is not understood. If not enough bits
2281 are available then all bits to the end of the bitstring will be used.
2282
2283 See the docstring for 'read' for token examples.
2284
2285 """
2286 return self._readlist(fmt, 0, **kwargs)[0]
2287
2288 def _readlist(self, fmt, pos, **kwargs):
2289 tokens = []
2290 stretchy_token = None
2291 if isinstance(fmt, basestring):
2292 fmt = [fmt]
2293 # Not very optimal this, but replace integers with 'bits' tokens
2294 # TODO: optimise
2295 for i, f in enumerate(fmt):
2296 if isinstance(f, numbers.Integral):
2297 fmt[i] = "bits:{0}".format(f)
2298 for f_item in fmt:
2299 stretchy, tkns = tokenparser(f_item, tuple(sorted(kwargs.keys())))
2300 if stretchy:
2301 if stretchy_token:
2302 raise Error("It's not possible to have more than one 'filler' token.")
2303 stretchy_token = stretchy
2304 tokens.extend(tkns)
2305 if not stretchy_token:
2306 lst = []
2307 for name, length, _ in tokens:
2308 if length in kwargs:
2309 length = kwargs[length]
2310 if name == 'bytes':
2311 length *= 8
2312 if name in kwargs and length is None:
2313 # Using default 'uint' - the name is really the length.
2314 value, pos = self._readtoken('uint', pos, kwargs[name])
2315 lst.append(value)
2316 continue
2317 value, pos = self._readtoken(name, pos, length)
2318 if value is not None: # Don't append pad tokens
2319 lst.append(value)
2320 return lst, pos
2321 stretchy_token = False
2322 bits_after_stretchy_token = 0
2323 for token in tokens:
2324 name, length, _ = token
2325 if length in kwargs:
2326 length = kwargs[length]
2327 if name == 'bytes':
2328 length *= 8
2329 if name in kwargs and length is None:
2330 # Default 'uint'.
2331 length = kwargs[name]
2332 if stretchy_token:
2333 if name in ('se', 'ue', 'sie', 'uie'):
2334 raise Error("It's not possible to parse a variable"
2335 "length token after a 'filler' token.")
2336 else:
2337 if length is None:
2338 raise Error("It's not possible to have more than "
2339 "one 'filler' token.")
2340 bits_after_stretchy_token += length
2341 if length is None and name not in ('se', 'ue', 'sie', 'uie'):
2342 assert not stretchy_token
2343 stretchy_token = token
2344 bits_left = self.len - pos
2345 return_values = []
2346 for token in tokens:
2347 name, length, _ = token
2348 if token is stretchy_token:
2349 # Set length to the remaining bits
2350 length = max(bits_left - bits_after_stretchy_token, 0)
2351 if length in kwargs:
2352 length = kwargs[length]
2353 if name == 'bytes':
2354 length *= 8
2355 if name in kwargs and length is None:
2356 # Default 'uint'
2357 length = kwargs[name]
2358 if length is not None:
2359 bits_left -= length
2360 value, pos = self._readtoken(name, pos, length)
2361 if value is not None:
2362 return_values.append(value)
2363 return return_values, pos
2364
2365 def _findbytes(self, bytes_, start, end, bytealigned):
2366 """Quicker version of find when everything's whole byte
2367 and byte aligned.
2368
2369 """
2370 assert self._datastore.offset == 0
2371 assert bytealigned is True
2372 # Extract data bytes from bitstring to be found.
2373 bytepos = (start + 7) // 8
2374 found = False
2375 p = bytepos
2376 finalpos = end // 8
2377 increment = max(1024, len(bytes_) * 10)
2378 buffersize = increment + len(bytes_)
2379 while p < finalpos:
2380 # Read in file or from memory in overlapping chunks and search the chunks.
2381 buf = bytearray(self._datastore.getbyteslice(p, min(p + buffersize, finalpos)))
2382 pos = buf.find(bytes_)
2383 if pos != -1:
2384 found = True
2385 p += pos
2386 break
2387 p += increment
2388 if not found:
2389 return ()
2390 return (p * 8,)
2391
2392 def _findregex(self, reg_ex, start, end, bytealigned):
2393 """Find first occurrence of a compiled regular expression.
2394
2395 Note that this doesn't support arbitrary regexes, in particular they
2396 must match a known length.
2397
2398 """
2399 p = start
2400 length = len(reg_ex.pattern)
2401 # We grab overlapping chunks of the binary representation and
2402 # do an ordinary string search within that.
2403 increment = max(4096, length * 10)
2404 buffersize = increment + length
2405 while p < end:
2406 buf = self._readbin(min(buffersize, end - p), p)
2407 # Test using regular expressions...
2408 m = reg_ex.search(buf)
2409 if m:
2410 pos = m.start()
2411 # pos = buf.find(targetbin)
2412 # if pos != -1:
2413 # if bytealigned then we only accept byte aligned positions.
2414 if not bytealigned or (p + pos) % 8 == 0:
2415 return (p + pos,)
2416 if bytealigned:
2417 # Advance to just beyond the non-byte-aligned match and try again...
2418 p += pos + 1
2419 continue
2420 p += increment
2421 # Not found, return empty tuple
2422 return ()
2423
2424 def find(self, bs, start=None, end=None, bytealigned=None):
2425 """Find first occurrence of substring bs.
2426
2427 Returns a single item tuple with the bit position if found, or an
2428 empty tuple if not found. The bit position (pos property) will
2429 also be set to the start of the substring if it is found.
2430
2431 bs -- The bitstring to find.
2432 start -- The bit position to start the search. Defaults to 0.
2433 end -- The bit position one past the last bit to search.
2434 Defaults to self.len.
2435 bytealigned -- If True the bitstring will only be
2436 found on byte boundaries.
2437
2438 Raises ValueError if bs is empty, if start < 0, if end > self.len or
2439 if end < start.
2440
2441 >>> BitArray('0xc3e').find('0b1111')
2442 (6,)
2443
2444 """
2445 bs = Bits(bs)
2446 if not bs.len:
2447 raise ValueError("Cannot find an empty bitstring.")
2448 start, end = self._validate_slice(start, end)
2449 if bytealigned is None:
2450 bytealigned = globals()['bytealigned']
2451 if bytealigned and not bs.len % 8 and not self._datastore.offset:
2452 p = self._findbytes(bs.bytes, start, end, bytealigned)
2453 else:
2454 p = self._findregex(re.compile(bs._getbin()), start, end, bytealigned)
2455 # If called from a class that has a pos, set it
2456 try:
2457 self._pos = p[0]
2458 except (AttributeError, IndexError):
2459 pass
2460 return p
2461
2462 def findall(self, bs, start=None, end=None, count=None, bytealigned=None):
2463 """Find all occurrences of bs. Return generator of bit positions.
2464
2465 bs -- The bitstring to find.
2466 start -- The bit position to start the search. Defaults to 0.
2467 end -- The bit position one past the last bit to search.
2468 Defaults to self.len.
2469 count -- The maximum number of occurrences to find.
2470 bytealigned -- If True the bitstring will only be found on
2471 byte boundaries.
2472
2473 Raises ValueError if bs is empty, if start < 0, if end > self.len or
2474 if end < start.
2475
2476 Note that all occurrences of bs are found, even if they overlap.
2477
2478 """
2479 if count is not None and count < 0:
2480 raise ValueError("In findall, count must be >= 0.")
2481 bs = Bits(bs)
2482 start, end = self._validate_slice(start, end)
2483 if bytealigned is None:
2484 bytealigned = globals()['bytealigned']
2485 c = 0
2486 if bytealigned and not bs.len % 8 and not self._datastore.offset:
2487 # Use the quick find method
2488 f = self._findbytes
2489 x = bs._getbytes()
2490 else:
2491 f = self._findregex
2492 x = re.compile(bs._getbin())
2493 while True:
2494
2495 p = f(x, start, end, bytealigned)
2496 if not p:
2497 break
2498 if count is not None and c >= count:
2499 return
2500 c += 1
2501 try:
2502 self._pos = p[0]
2503 except AttributeError:
2504 pass
2505 yield p[0]
2506 if bytealigned:
2507 start = p[0] + 8
2508 else:
2509 start = p[0] + 1
2510 if start >= end:
2511 break
2512 return
2513
2514 def rfind(self, bs, start=None, end=None, bytealigned=None):
2515 """Find final occurrence of substring bs.
2516
2517 Returns a single item tuple with the bit position if found, or an
2518 empty tuple if not found. The bit position (pos property) will
2519 also be set to the start of the substring if it is found.
2520
2521 bs -- The bitstring to find.
2522 start -- The bit position to end the reverse search. Defaults to 0.
2523 end -- The bit position one past the first bit to reverse search.
2524 Defaults to self.len.
2525 bytealigned -- If True the bitstring will only be found on byte
2526 boundaries.
2527
2528 Raises ValueError if bs is empty, if start < 0, if end > self.len or
2529 if end < start.
2530
2531 """
2532 bs = Bits(bs)
2533 start, end = self._validate_slice(start, end)
2534 if bytealigned is None:
2535 bytealigned = globals()['bytealigned']
2536 if not bs.len:
2537 raise ValueError("Cannot find an empty bitstring.")
2538 # Search chunks starting near the end and then moving back
2539 # until we find bs.
2540 increment = max(8192, bs.len * 80)
2541 buffersize = min(increment + bs.len, end - start)
2542 pos = max(start, end - buffersize)
2543 while True:
2544 found = list(self.findall(bs, start=pos, end=pos + buffersize,
2545 bytealigned=bytealigned))
2546 if not found:
2547 if pos == start:
2548 return ()
2549 pos = max(start, pos - increment)
2550 continue
2551 return (found[-1],)
2552
2553 def cut(self, bits, start=None, end=None, count=None):
2554 """Return bitstring generator by cutting into bits sized chunks.
2555
2556 bits -- The size in bits of the bitstring chunks to generate.
2557 start -- The bit position to start the first cut. Defaults to 0.
2558 end -- The bit position one past the last bit to use in the cut.
2559 Defaults to self.len.
2560 count -- If specified then at most count items are generated.
2561 Default is to cut as many times as possible.
2562
2563 """
2564 start, end = self._validate_slice(start, end)
2565 if count is not None and count < 0:
2566 raise ValueError("Cannot cut - count must be >= 0.")
2567 if bits <= 0:
2568 raise ValueError("Cannot cut - bits must be >= 0.")
2569 c = 0
2570 while count is None or c < count:
2571 c += 1
2572 nextchunk = self._slice(start, min(start + bits, end))
2573 if nextchunk.len != bits:
2574 return
2575 assert nextchunk._assertsanity()
2576 yield nextchunk
2577 start += bits
2578 return
2579
2580 def split(self, delimiter, start=None, end=None, count=None,
2581 bytealigned=None):
2582 """Return bitstring generator by splittling using a delimiter.
2583
2584 The first item returned is the initial bitstring before the delimiter,
2585 which may be an empty bitstring.
2586
2587 delimiter -- The bitstring used as the divider.
2588 start -- The bit position to start the split. Defaults to 0.
2589 end -- The bit position one past the last bit to use in the split.
2590 Defaults to self.len.
2591 count -- If specified then at most count items are generated.
2592 Default is to split as many times as possible.
2593 bytealigned -- If True splits will only occur on byte boundaries.
2594
2595 Raises ValueError if the delimiter is empty.
2596
2597 """
2598 delimiter = Bits(delimiter)
2599 if not delimiter.len:
2600 raise ValueError("split delimiter cannot be empty.")
2601 start, end = self._validate_slice(start, end)
2602 if bytealigned is None:
2603 bytealigned = globals()['bytealigned']
2604 if count is not None and count < 0:
2605 raise ValueError("Cannot split - count must be >= 0.")
2606 if count == 0:
2607 return
2608 if bytealigned and not delimiter.len % 8 and not self._datastore.offset:
2609 # Use the quick find method
2610 f = self._findbytes
2611 x = delimiter._getbytes()
2612 else:
2613 f = self._findregex
2614 x = re.compile(delimiter._getbin())
2615 found = f(x, start, end, bytealigned)
2616 if not found:
2617 # Initial bits are the whole bitstring being searched
2618 yield self._slice(start, end)
2619 return
2620 # yield the bytes before the first occurrence of the delimiter, even if empty
2621 yield self._slice(start, found[0])
2622 startpos = pos = found[0]
2623 c = 1
2624 while count is None or c < count:
2625 pos += delimiter.len
2626 found = f(x, pos, end, bytealigned)
2627 if not found:
2628 # No more occurrences, so return the rest of the bitstring
2629 yield self._slice(startpos, end)
2630 return
2631 c += 1
2632 yield self._slice(startpos, found[0])
2633 startpos = pos = found[0]
2634 # Have generated count bitstrings, so time to quit.
2635 return
2636
2637 def join(self, sequence):
2638 """Return concatenation of bitstrings joined by self.
2639
2640 sequence -- A sequence of bitstrings.
2641
2642 """
2643 s = self.__class__()
2644 i = iter(sequence)
2645 try:
2646 s._append(Bits(next(i)))
2647 while True:
2648 n = next(i)
2649 s._append(self)
2650 s._append(Bits(n))
2651 except StopIteration:
2652 pass
2653 return s
2654
2655 def tobytes(self):
2656 """Return the bitstring as bytes, padding with zero bits if needed.
2657
2658 Up to seven zero bits will be added at the end to byte align.
2659
2660 """
2661 d = offsetcopy(self._datastore, 0).rawbytes
2662 # Need to ensure that unused bits at end are set to zero
2663 unusedbits = 8 - self.len % 8
2664 if unusedbits != 8:
2665 d[-1] &= (0xff << unusedbits)
2666 return bytes(d)
2667
2668 def tofile(self, f):
2669 """Write the bitstring to a file object, padding with zero bits if needed.
2670
2671 Up to seven zero bits will be added at the end to byte align.
2672
2673 """
2674 # If the bitstring is file based then we don't want to read it all
2675 # in to memory.
2676 chunksize = 1024 * 1024 # 1 MB chunks
2677 if not self._offset:
2678 a = 0
2679 bytelen = self._datastore.bytelength
2680 p = self._datastore.getbyteslice(a, min(a + chunksize, bytelen - 1))
2681 while len(p) == chunksize:
2682 f.write(p)
2683 a += chunksize
2684 p = self._datastore.getbyteslice(a, min(a + chunksize, bytelen - 1))
2685 f.write(p)
2686 # Now the final byte, ensuring that unused bits at end are set to 0.
2687 bits_in_final_byte = self.len % 8
2688 if not bits_in_final_byte:
2689 bits_in_final_byte = 8
2690 f.write(self[-bits_in_final_byte:].tobytes())
2691 else:
2692 # Really quite inefficient...
2693 a = 0
2694 b = a + chunksize * 8
2695 while b <= self.len:
2696 f.write(self._slice(a, b)._getbytes())
2697 a += chunksize * 8
2698 b += chunksize * 8
2699 if a != self.len:
2700 f.write(self._slice(a, self.len).tobytes())
2701
2702 def startswith(self, prefix, start=None, end=None):
2703 """Return whether the current bitstring starts with prefix.
2704
2705 prefix -- The bitstring to search for.
2706 start -- The bit position to start from. Defaults to 0.
2707 end -- The bit position to end at. Defaults to self.len.
2708
2709 """
2710 prefix = Bits(prefix)
2711 start, end = self._validate_slice(start, end)
2712 if end < start + prefix.len:
2713 return False
2714 end = start + prefix.len
2715 return self._slice(start, end) == prefix
2716
2717 def endswith(self, suffix, start=None, end=None):
2718 """Return whether the current bitstring ends with suffix.
2719
2720 suffix -- The bitstring to search for.
2721 start -- The bit position to start from. Defaults to 0.
2722 end -- The bit position to end at. Defaults to self.len.
2723
2724 """
2725 suffix = Bits(suffix)
2726 start, end = self._validate_slice(start, end)
2727 if start + suffix.len > end:
2728 return False
2729 start = end - suffix.len
2730 return self._slice(start, end) == suffix
2731
2732 def all(self, value, pos=None):
2733 """Return True if one or many bits are all set to value.
2734
2735 value -- If value is True then checks for bits set to 1, otherwise
2736 checks for bits set to 0.
2737 pos -- An iterable of bit positions. Negative numbers are treated in
2738 the same way as slice indices. Defaults to the whole bitstring.
2739
2740 """
2741 value = bool(value)
2742 length = self.len
2743 if pos is None:
2744 pos = xrange(self.len)
2745 for p in pos:
2746 if p < 0:
2747 p += length
2748 if not 0 <= p < length:
2749 raise IndexError("Bit position {0} out of range.".format(p))
2750 if not self._datastore.getbit(p) is value:
2751 return False
2752 return True
2753
2754 def any(self, value, pos=None):
2755 """Return True if any of one or many bits are set to value.
2756
2757 value -- If value is True then checks for bits set to 1, otherwise
2758 checks for bits set to 0.
2759 pos -- An iterable of bit positions. Negative numbers are treated in
2760 the same way as slice indices. Defaults to the whole bitstring.
2761
2762 """
2763 value = bool(value)
2764 length = self.len
2765 if pos is None:
2766 pos = xrange(self.len)
2767 for p in pos:
2768 if p < 0:
2769 p += length
2770 if not 0 <= p < length:
2771 raise IndexError("Bit position {0} out of range.".format(p))
2772 if self._datastore.getbit(p) is value:
2773 return True
2774 return False
2775
2776 def count(self, value):
2777 """Return count of total number of either zero or one bits.
2778
2779 value -- If True then bits set to 1 are counted, otherwise bits set
2780 to 0 are counted.
2781
2782 >>> Bits('0xef').count(1)
2783 7
2784
2785 """
2786 if not self.len:
2787 return 0
2788 # count the number of 1s (from which it's easy to work out the 0s).
2789 # Don't count the final byte yet.
2790 count = sum(BIT_COUNT[self._datastore.getbyte(i)] for i in xrange(self._datastore.bytelength - 1))
2791 # adjust for bits at start that aren't part of the bitstring
2792 if self._offset:
2793 count -= BIT_COUNT[self._datastore.getbyte(0) >> (8 - self._offset)]
2794 # and count the last 1 - 8 bits at the end.
2795 endbits = self._datastore.bytelength * 8 - (self._offset + self.len)
2796 count += BIT_COUNT[self._datastore.getbyte(self._datastore.bytelength - 1) >> endbits]
2797 return count if value else self.len - count
2798
2799 # Create native-endian functions as aliases depending on the byteorder
2800 if byteorder == 'little':
2801 _setfloatne = _setfloatle
2802 _readfloatne = _readfloatle
2803 _getfloatne = _getfloatle
2804 _setuintne = _setuintle
2805 _readuintne = _readuintle
2806 _getuintne = _getuintle
2807 _setintne = _setintle
2808 _readintne = _readintle
2809 _getintne = _getintle
2810 else:
2811 _setfloatne = _setfloat
2812 _readfloatne = _readfloat
2813 _getfloatne = _getfloat
2814 _setuintne = _setuintbe
2815 _readuintne = _readuintbe
2816 _getuintne = _getuintbe
2817 _setintne = _setintbe
2818 _readintne = _readintbe
2819 _getintne = _getintbe
2820
2821 _offset = property(_getoffset)
2822
2823 len = property(_getlength,
2824 doc="""The length of the bitstring in bits. Read only.
2825 """)
2826 length = property(_getlength,
2827 doc="""The length of the bitstring in bits. Read only.
2828 """)
2829 bool = property(_getbool,
2830 doc="""The bitstring as a bool (True or False). Read only.
2831 """)
2832 hex = property(_gethex,
2833 doc="""The bitstring as a hexadecimal string. Read only.
2834 """)
2835 bin = property(_getbin,
2836 doc="""The bitstring as a binary string. Read only.
2837 """)
2838 oct = property(_getoct,
2839 doc="""The bitstring as an octal string. Read only.
2840 """)
2841 bytes = property(_getbytes,
2842 doc="""The bitstring as a bytes object. Read only.
2843 """)
2844 int = property(_getint,
2845 doc="""The bitstring as a two's complement signed int. Read only.
2846 """)
2847 uint = property(_getuint,
2848 doc="""The bitstring as a two's complement unsigned int. Read only.
2849 """)
2850 float = property(_getfloat,
2851 doc="""The bitstring as a floating point number. Read only.
2852 """)
2853 intbe = property(_getintbe,
2854 doc="""The bitstring as a two's complement big-endian signed int. Read only.
2855 """)
2856 uintbe = property(_getuintbe,
2857 doc="""The bitstring as a two's complement big-endian unsigned int. Read only.
2858 """)
2859 floatbe = property(_getfloat,
2860 doc="""The bitstring as a big-endian floating point number. Read only.
2861 """)
2862 intle = property(_getintle,
2863 doc="""The bitstring as a two's complement little-endian signed int. Read only.
2864 """)
2865 uintle = property(_getuintle,
2866 doc="""The bitstring as a two's complement little-endian unsigned int. Read only.
2867 """)
2868 floatle = property(_getfloatle,
2869 doc="""The bitstring as a little-endian floating point number. Read only.
2870 """)
2871 intne = property(_getintne,
2872 doc="""The bitstring as a two's complement native-endian signed int. Read only.
2873 """)
2874 uintne = property(_getuintne,
2875 doc="""The bitstring as a two's complement native-endian unsigned int. Read only.
2876 """)
2877 floatne = property(_getfloatne,
2878 doc="""The bitstring as a native-endian floating point number. Read only.
2879 """)
2880 ue = property(_getue,
2881 doc="""The bitstring as an unsigned exponential-Golomb code. Read only.
2882 """)
2883 se = property(_getse,
2884 doc="""The bitstring as a signed exponential-Golomb code. Read only.
2885 """)
2886 uie = property(_getuie,
2887 doc="""The bitstring as an unsigned interleaved exponential-Golomb code. Read only.
2888 """)
2889 sie = property(_getsie,
2890 doc="""The bitstring as a signed interleaved exponential-Golomb code. Read only.
2891 """)
2892
2893
2894# Dictionary that maps token names to the function that reads them.
2895name_to_read = {'uint': Bits._readuint,
2896 'uintle': Bits._readuintle,
2897 'uintbe': Bits._readuintbe,
2898 'uintne': Bits._readuintne,
2899 'int': Bits._readint,
2900 'intle': Bits._readintle,
2901 'intbe': Bits._readintbe,
2902 'intne': Bits._readintne,
2903 'float': Bits._readfloat,
2904 'floatbe': Bits._readfloat, # floatbe is a synonym for float
2905 'floatle': Bits._readfloatle,
2906 'floatne': Bits._readfloatne,
2907 'hex': Bits._readhex,
2908 'oct': Bits._readoct,
2909 'bin': Bits._readbin,
2910 'bits': Bits._readbits,
2911 'bytes': Bits._readbytes,
2912 'ue': Bits._readue,
2913 'se': Bits._readse,
2914 'uie': Bits._readuie,
2915 'sie': Bits._readsie,
2916 'bool': Bits._readbool,
2917 }
2918
2919# Dictionaries for mapping init keywords with init functions.
2920init_with_length_and_offset = {'bytes': Bits._setbytes_safe,
2921 'filename': Bits._setfile,
2922 }
2923
2924init_with_length_only = {'uint': Bits._setuint,
2925 'int': Bits._setint,
2926 'float': Bits._setfloat,
2927 'uintbe': Bits._setuintbe,
2928 'intbe': Bits._setintbe,
2929 'floatbe': Bits._setfloat,
2930 'uintle': Bits._setuintle,
2931 'intle': Bits._setintle,
2932 'floatle': Bits._setfloatle,
2933 'uintne': Bits._setuintne,
2934 'intne': Bits._setintne,
2935 'floatne': Bits._setfloatne,
2936 }
2937
2938init_without_length_or_offset = {'bin': Bits._setbin_safe,
2939 'hex': Bits._sethex,
2940 'oct': Bits._setoct,
2941 'ue': Bits._setue,
2942 'se': Bits._setse,
2943 'uie': Bits._setuie,
2944 'sie': Bits._setsie,
2945 'bool': Bits._setbool,
2946 }
2947
2948
2950 """A container holding a mutable sequence of bits.
2951
2952 Subclass of the immutable Bits class. Inherits all of its
2953 methods (except __hash__) and adds mutating methods.
2954
2955 Mutating methods:
2956
2957 append() -- Append a bitstring.
2958 byteswap() -- Change byte endianness in-place.
2959 insert() -- Insert a bitstring.
2960 invert() -- Flip bit(s) between one and zero.
2961 overwrite() -- Overwrite a section with a new bitstring.
2962 prepend() -- Prepend a bitstring.
2963 replace() -- Replace occurrences of one bitstring with another.
2964 reverse() -- Reverse bits in-place.
2965 rol() -- Rotate bits to the left.
2966 ror() -- Rotate bits to the right.
2967 set() -- Set bit(s) to 1 or 0.
2968
2969 Methods inherited from Bits:
2970
2971 all() -- Check if all specified bits are set to 1 or 0.
2972 any() -- Check if any of specified bits are set to 1 or 0.
2973 count() -- Count the number of bits set to 1 or 0.
2974 cut() -- Create generator of constant sized chunks.
2975 endswith() -- Return whether the bitstring ends with a sub-string.
2976 find() -- Find a sub-bitstring in the current bitstring.
2977 findall() -- Find all occurrences of a sub-bitstring in the current bitstring.
2978 join() -- Join bitstrings together using current bitstring.
2979 rfind() -- Seek backwards to find a sub-bitstring.
2980 split() -- Create generator of chunks split by a delimiter.
2981 startswith() -- Return whether the bitstring starts with a sub-bitstring.
2982 tobytes() -- Return bitstring as bytes, padding if needed.
2983 tofile() -- Write bitstring to file, padding if needed.
2984 unpack() -- Interpret bits using format string.
2985
2986 Special methods:
2987
2988 Mutating operators are available: [], <<=, >>=, +=, *=, &=, |= and ^=
2989 in addition to the inherited [], ==, !=, +, *, ~, <<, >>, &, | and ^.
2990
2991 Properties:
2992
2993 bin -- The bitstring as a binary string.
2994 bool -- For single bit bitstrings, interpret as True or False.
2995 bytepos -- The current byte position in the bitstring.
2996 bytes -- The bitstring as a bytes object.
2997 float -- Interpret as a floating point number.
2998 floatbe -- Interpret as a big-endian floating point number.
2999 floatle -- Interpret as a little-endian floating point number.
3000 floatne -- Interpret as a native-endian floating point number.
3001 hex -- The bitstring as a hexadecimal string.
3002 int -- Interpret as a two's complement signed integer.
3003 intbe -- Interpret as a big-endian signed integer.
3004 intle -- Interpret as a little-endian signed integer.
3005 intne -- Interpret as a native-endian signed integer.
3006 len -- Length of the bitstring in bits.
3007 oct -- The bitstring as an octal string.
3008 pos -- The current bit position in the bitstring.
3009 se -- Interpret as a signed exponential-Golomb code.
3010 ue -- Interpret as an unsigned exponential-Golomb code.
3011 sie -- Interpret as a signed interleaved exponential-Golomb code.
3012 uie -- Interpret as an unsigned interleaved exponential-Golomb code.
3013 uint -- Interpret as a two's complement unsigned integer.
3014 uintbe -- Interpret as a big-endian unsigned integer.
3015 uintle -- Interpret as a little-endian unsigned integer.
3016 uintne -- Interpret as a native-endian unsigned integer.
3017
3018 """
3019
3020 __slots__ = ()
3021
3022 # As BitArray objects are mutable, we shouldn't allow them to be hashed.
3023 __hash__ = None
3024
3025 def __init__(self, auto=None, length=None, offset=None, **kwargs):
3026 """Either specify an 'auto' initialiser:
3027 auto -- a string of comma separated tokens, an integer, a file object,
3028 a bytearray, a boolean iterable or another bitstring.
3029
3030 Or initialise via **kwargs with one (and only one) of:
3031 bytes -- raw data as a string, for example read from a binary file.
3032 bin -- binary string representation, e.g. '0b001010'.
3033 hex -- hexadecimal string representation, e.g. '0x2ef'
3034 oct -- octal string representation, e.g. '0o777'.
3035 uint -- an unsigned integer.
3036 int -- a signed integer.
3037 float -- a floating point number.
3038 uintbe -- an unsigned big-endian whole byte integer.
3039 intbe -- a signed big-endian whole byte integer.
3040 floatbe - a big-endian floating point number.
3041 uintle -- an unsigned little-endian whole byte integer.
3042 intle -- a signed little-endian whole byte integer.
3043 floatle -- a little-endian floating point number.
3044 uintne -- an unsigned native-endian whole byte integer.
3045 intne -- a signed native-endian whole byte integer.
3046 floatne -- a native-endian floating point number.
3047 se -- a signed exponential-Golomb code.
3048 ue -- an unsigned exponential-Golomb code.
3049 sie -- a signed interleaved exponential-Golomb code.
3050 uie -- an unsigned interleaved exponential-Golomb code.
3051 bool -- a boolean (True or False).
3052 filename -- a file which will be opened in binary read-only mode.
3053
3054 Other keyword arguments:
3055 length -- length of the bitstring in bits, if needed and appropriate.
3056 It must be supplied for all integer and float initialisers.
3057 offset -- bit offset to the data. These offset bits are
3058 ignored and this is intended for use when
3059 initialising using 'bytes' or 'filename'.
3060
3061 """
3062 # For mutable BitArrays we always read in files to memory:
3063 if not isinstance(self._datastore, ByteStore):
3064 self._ensureinmemory()
3065
3066 def __new__(cls, auto=None, length=None, offset=None, **kwargs):
3067 x = super(BitArray, cls).__new__(cls)
3068 y = Bits.__new__(BitArray, auto, length, offset, **kwargs)
3069 x._datastore = y._datastore
3070 return x
3071
3072 def __iadd__(self, bs):
3073 """Append bs to current bitstring. Return self.
3074
3075 bs -- the bitstring to append.
3076
3077 """
3078 self.append(bs)
3079 return self
3080
3081 def __copy__(self):
3082 """Return a new copy of the BitArray."""
3083 s_copy = BitArray()
3084 if not isinstance(self._datastore, ByteStore):
3085 # Let them both point to the same (invariant) array.
3086 # If either gets modified then at that point they'll be read into memory.
3087 s_copy._datastore = self._datastore
3088 else:
3089 s_copy._datastore = copy.copy(self._datastore)
3090 return s_copy
3091
3092 def __setitem__(self, key, value):
3093 """Set item or range to new value.
3094
3095 Indices are in units of the step parameter (default 1 bit).
3096 Stepping is used to specify the number of bits in each item.
3097
3098 If the length of the bitstring is changed then pos will be moved
3099 to after the inserted section, otherwise it will remain unchanged.
3100
3101 >>> s = BitArray('0xff')
3102 >>> s[0:1:4] = '0xe'
3103 >>> print s
3104 '0xef'
3105 >>> s[4:4] = '0x00'
3106 >>> print s
3107 '0xe00f'
3108
3109 """
3110 try:
3111 # A slice
3112 start, step = 0, 1
3113 if key.step is not None:
3114 step = key.step
3115 except AttributeError:
3116 # single element
3117 if key < 0:
3118 key += self.len
3119 if not 0 <= key < self.len:
3120 raise IndexError("Slice index out of range.")
3121 if isinstance(value, numbers.Integral):
3122 if not value:
3123 self._unset(key)
3124 return
3125 if value in (1, -1):
3126 self._set(key)
3127 return
3128 raise ValueError("Cannot set a single bit with integer {0}.".format(value))
3129 value = Bits(value)
3130 if value.len == 1:
3131 # TODO: this can't be optimal
3132 if value[0]:
3133 self._set(key)
3134 else:
3135 self._unset(key)
3136 else:
3137 self._delete(1, key)
3138 self._insert(value, key)
3139 return
3140 else:
3141 if step != 1:
3142 # convert to binary string and use string slicing
3143 # TODO: Horribly inefficent
3144 temp = list(self._getbin())
3145 v = list(Bits(value)._getbin())
3146 temp.__setitem__(key, v)
3147 self._setbin_unsafe(''.join(temp))
3148 return
3149
3150 # If value is an integer then we want to set the slice to that
3151 # value rather than initialise a new bitstring of that length.
3152 if not isinstance(value, numbers.Integral):
3153 try:
3154 # TODO: Better way than calling constructor here?
3155 value = Bits(value)
3156 except TypeError:
3157 raise TypeError("Bitstring, integer or string expected. "
3158 "Got {0}.".format(type(value)))
3159 if key.start is not None:
3160 start = key.start
3161 if key.start < 0:
3162 start += self.len
3163 if start < 0:
3164 start = 0
3165 stop = self.len
3166 if key.stop is not None:
3167 stop = key.stop
3168 if key.stop < 0:
3169 stop += self.len
3170 if start > stop:
3171 # The standard behaviour for lists is to just insert at the
3172 # start position if stop < start and step == 1.
3173 stop = start
3174 if isinstance(value, numbers.Integral):
3175 if value >= 0:
3176 value = self.__class__(uint=value, length=stop - start)
3177 else:
3178 value = self.__class__(int=value, length=stop - start)
3179 stop = min(stop, self.len)
3180 start = max(start, 0)
3181 start = min(start, stop)
3182 if (stop - start) == value.len:
3183 if not value.len:
3184 return
3185 if step >= 0:
3186 self._overwrite(value, start)
3187 else:
3188 self._overwrite(value.__getitem__(slice(None, None, 1)), start)
3189 else:
3190 # TODO: A delete then insert is wasteful - it could do unneeded shifts.
3191 # Could be either overwrite + insert or overwrite + delete.
3192 self._delete(stop - start, start)
3193 if step >= 0:
3194 self._insert(value, start)
3195 else:
3196 self._insert(value.__getitem__(slice(None, None, 1)), start)
3197 # pos is now after the inserted piece.
3198 return
3199
3200 def __delitem__(self, key):
3201 """Delete item or range.
3202
3203 Indices are in units of the step parameter (default 1 bit).
3204 Stepping is used to specify the number of bits in each item.
3205
3206 >>> a = BitArray('0x001122')
3207 >>> del a[1:2:8]
3208 >>> print a
3209 0x0022
3210
3211 """
3212 try:
3213 # A slice
3214 start = 0
3215 step = key.step if key.step is not None else 1
3216 except AttributeError:
3217 # single element
3218 if key < 0:
3219 key += self.len
3220 if not 0 <= key < self.len:
3221 raise IndexError("Slice index out of range.")
3222 self._delete(1, key)
3223 return
3224 else:
3225 if step != 1:
3226 # convert to binary string and use string slicing
3227 # TODO: Horribly inefficent
3228 temp = list(self._getbin())
3229 temp.__delitem__(key)
3230 self._setbin_unsafe(''.join(temp))
3231 return
3232 stop = key.stop
3233 if key.start is not None:
3234 start = key.start
3235 if key.start < 0 and stop is None:
3236 start += self.len
3237 if start < 0:
3238 start = 0
3239 if stop is None:
3240 stop = self.len
3241 if start > stop:
3242 return
3243 stop = min(stop, self.len)
3244 start = max(start, 0)
3245 start = min(start, stop)
3246 self._delete(stop - start, start)
3247 return
3248
3249 def __ilshift__(self, n):
3250 """Shift bits by n to the left in place. Return self.
3251
3252 n -- the number of bits to shift. Must be >= 0.
3253
3254 """
3255 if n < 0:
3256 raise ValueError("Cannot shift by a negative amount.")
3257 if not self.len:
3258 raise ValueError("Cannot shift an empty bitstring.")
3259 if not n:
3260 return self
3261 n = min(n, self.len)
3262 return self._ilshift(n)
3263
3264 def __irshift__(self, n):
3265 """Shift bits by n to the right in place. Return self.
3266
3267 n -- the number of bits to shift. Must be >= 0.
3268
3269 """
3270 if n < 0:
3271 raise ValueError("Cannot shift by a negative amount.")
3272 if not self.len:
3273 raise ValueError("Cannot shift an empty bitstring.")
3274 if not n:
3275 return self
3276 n = min(n, self.len)
3277 return self._irshift(n)
3278
3279 def __imul__(self, n):
3280 """Concatenate n copies of self in place. Return self.
3281
3282 Called for expressions of the form 'a *= 3'.
3283 n -- The number of concatenations. Must be >= 0.
3284
3285 """
3286 if n < 0:
3287 raise ValueError("Cannot multiply by a negative integer.")
3288 return self._imul(n)
3289
3290 def __ior__(self, bs):
3291 bs = Bits(bs)
3292 if self.len != bs.len:
3293 raise ValueError("Bitstrings must have the same length "
3294 "for |= operator.")
3295 return self._ior(bs)
3296
3297 def __iand__(self, bs):
3298 bs = Bits(bs)
3299 if self.len != bs.len:
3300 raise ValueError("Bitstrings must have the same length "
3301 "for &= operator.")
3302 return self._iand(bs)
3303
3304 def __ixor__(self, bs):
3305 bs = Bits(bs)
3306 if self.len != bs.len:
3307 raise ValueError("Bitstrings must have the same length "
3308 "for ^= operator.")
3309 return self._ixor(bs)
3310
3311 def replace(self, old, new, start=None, end=None, count=None,
3312 bytealigned=None):
3313 """Replace all occurrences of old with new in place.
3314
3315 Returns number of replacements made.
3316
3317 old -- The bitstring to replace.
3318 new -- The replacement bitstring.
3319 start -- Any occurrences that start before this will not be replaced.
3320 Defaults to 0.
3321 end -- Any occurrences that finish after this will not be replaced.
3322 Defaults to self.len.
3323 count -- The maximum number of replacements to make. Defaults to
3324 replace all occurrences.
3325 bytealigned -- If True replacements will only be made on byte
3326 boundaries.
3327
3328 Raises ValueError if old is empty or if start or end are
3329 out of range.
3330
3331 """
3332 old = Bits(old)
3333 new = Bits(new)
3334 if not old.len:
3335 raise ValueError("Empty bitstring cannot be replaced.")
3336 start, end = self._validate_slice(start, end)
3337 if bytealigned is None:
3338 bytealigned = globals()['bytealigned']
3339 # Adjust count for use in split()
3340 if count is not None:
3341 count += 1
3342 sections = self.split(old, start, end, count, bytealigned)
3343 lengths = [s.len for s in sections]
3344 if len(lengths) == 1:
3345 # Didn't find anything to replace.
3346 return 0 # no replacements done
3347 if new is self:
3348 # Prevent self assignment woes
3349 new = copy.copy(self)
3350 positions = [lengths[0] + start]
3351 for k in lengths[1:-1]:
3352 # Next position is the previous one plus the length of the next section.
3353 positions.append(positions[-1] + k)
3354 # We have all the positions that need replacements. We do them
3355 # in reverse order so that they won't move around as we replace.
3356 positions.reverse()
3357 try:
3358 # Need to calculate new pos, if this is a bitstream
3359 newpos = self._pos_pos
3360 for p in positions:
3361 self[p:p + old.len] = new
3362 if old.len != new.len:
3363 diff = new.len - old.len
3364 for p in positions:
3365 if p >= newpos:
3366 continue
3367 if p + old.len <= newpos:
3368 newpos += diff
3369 else:
3370 newpos = p
3371 self._pos_pos = newpos
3372 except AttributeError:
3373 for p in positions:
3374 self[p:p + old.len] = new
3375 assert self._assertsanity()
3376 return len(lengths) - 1
3377
3378 def insert(self, bs, pos=None):
3379 """Insert bs at bit position pos.
3380
3381 bs -- The bitstring to insert.
3382 pos -- The bit position to insert at.
3383
3384 Raises ValueError if pos < 0 or pos > self.len.
3385
3386 """
3387 bs = Bits(bs)
3388 if not bs.len:
3389 return self
3390 if bs is self:
3391 bs = self.__copy____copy__()
3392 if pos is None:
3393 try:
3394 pos = self._pos_pos
3395 except AttributeError:
3396 raise TypeError("insert require a bit position for this type.")
3397 if pos < 0:
3398 pos += self.len
3399 if not 0 <= pos <= self.len:
3400 raise ValueError("Invalid insert position.")
3401 self._insert(bs, pos)
3402
3403 def overwrite(self, bs, pos=None):
3404 """Overwrite with bs at bit position pos.
3405
3406 bs -- The bitstring to overwrite with.
3407 pos -- The bit position to begin overwriting from.
3408
3409 Raises ValueError if pos < 0 or pos + bs.len > self.len
3410
3411 """
3412 bs = Bits(bs)
3413 if not bs.len:
3414 return
3415 if pos is None:
3416 try:
3417 pos = self._pos_pos
3418 except AttributeError:
3419 raise TypeError("overwrite require a bit position for this type.")
3420 if pos < 0:
3421 pos += self.len
3422 if pos < 0 or pos + bs.len > self.len:
3423 raise ValueError("Overwrite exceeds boundary of bitstring.")
3424 self._overwrite(bs, pos)
3425 try:
3426 self._pos_pos = pos + bs.len
3427 except AttributeError:
3428 pass
3429
3430 def append(self, bs):
3431 """Append a bitstring to the current bitstring.
3432
3433 bs -- The bitstring to append.
3434
3435 """
3436 # The offset is a hint to make bs easily appendable.
3437 bs = self._converttobitstring(bs, offset=(self.len + self._offset) % 8)
3438 self._append(bs)
3439
3440 def prepend(self, bs):
3441 """Prepend a bitstring to the current bitstring.
3442
3443 bs -- The bitstring to prepend.
3444
3445 """
3446 bs = Bits(bs)
3447 self._prepend(bs)
3448
3449 def reverse(self, start=None, end=None):
3450 """Reverse bits in-place.
3451
3452 start -- Position of first bit to reverse. Defaults to 0.
3453 end -- One past the position of the last bit to reverse.
3454 Defaults to self.len.
3455
3456 Using on an empty bitstring will have no effect.
3457
3458 Raises ValueError if start < 0, end > self.len or end < start.
3459
3460 """
3461 start, end = self._validate_slice(start, end)
3462 if start == 0 and end == self.len:
3463 self._reverse()
3464 return
3465 s = self._slice(start, end)
3466 s._reverse()
3467 self[start:end] = s
3468
3469 def set(self, value, pos=None):
3470 """Set one or many bits to 1 or 0.
3471
3472 value -- If True bits are set to 1, otherwise they are set to 0.
3473 pos -- Either a single bit position or an iterable of bit positions.
3474 Negative numbers are treated in the same way as slice indices.
3475 Defaults to the entire bitstring.
3476
3477 Raises IndexError if pos < -self.len or pos >= self.len.
3478
3479 """
3480 f = self._set if value else self._unset
3481 if pos is None:
3482 pos = xrange(self.len)
3483 try:
3484 length = self.len
3485 for p in pos:
3486 if p < 0:
3487 p += length
3488 if not 0 <= p < length:
3489 raise IndexError("Bit position {0} out of range.".format(p))
3490 f(p)
3491 except TypeError:
3492 # Single pos
3493 if pos < 0:
3494 pos += self.len
3495 if not 0 <= pos < length:
3496 raise IndexError("Bit position {0} out of range.".format(pos))
3497 f(pos)
3498
3499 def invert(self, pos=None):
3500 """Invert one or many bits from 0 to 1 or vice versa.
3501
3502 pos -- Either a single bit position or an iterable of bit positions.
3503 Negative numbers are treated in the same way as slice indices.
3504
3505 Raises IndexError if pos < -self.len or pos >= self.len.
3506
3507 """
3508 if pos is None:
3509 self._invert_all()
3510 return
3511 if not isinstance(pos, collections.abc.Iterable):
3512 pos = (pos,)
3513 length = self.len
3514
3515 for p in pos:
3516 if p < 0:
3517 p += length
3518 if not 0 <= p < length:
3519 raise IndexError("Bit position {0} out of range.".format(p))
3520 self._invert(p)
3521
3522 def ror(self, bits, start=None, end=None):
3523 """Rotate bits to the right in-place.
3524
3525 bits -- The number of bits to rotate by.
3526 start -- Start of slice to rotate. Defaults to 0.
3527 end -- End of slice to rotate. Defaults to self.len.
3528
3529 Raises ValueError if bits < 0.
3530
3531 """
3532 if not self.len:
3533 raise Error("Cannot rotate an empty bitstring.")
3534 if bits < 0:
3535 raise ValueError("Cannot rotate right by negative amount.")
3536 start, end = self._validate_slice(start, end)
3537 bits %= (end - start)
3538 if not bits:
3539 return
3540 rhs = self._slice(end - bits, end)
3541 self._delete(bits, end - bits)
3542 self._insert(rhs, start)
3543
3544 def rol(self, bits, start=None, end=None):
3545 """Rotate bits to the left in-place.
3546
3547 bits -- The number of bits to rotate by.
3548 start -- Start of slice to rotate. Defaults to 0.
3549 end -- End of slice to rotate. Defaults to self.len.
3550
3551 Raises ValueError if bits < 0.
3552
3553 """
3554 if not self.len:
3555 raise Error("Cannot rotate an empty bitstring.")
3556 if bits < 0:
3557 raise ValueError("Cannot rotate left by negative amount.")
3558 start, end = self._validate_slice(start, end)
3559 bits %= (end - start)
3560 if not bits:
3561 return
3562 lhs = self._slice(start, start + bits)
3563 self._delete(bits, start)
3564 self._insert(lhs, end - bits)
3565
3566 def byteswap(self, fmt=None, start=None, end=None, repeat=True):
3567 """Change the endianness in-place. Return number of repeats of fmt done.
3568
3569 fmt -- A compact structure string, an integer number of bytes or
3570 an iterable of integers. Defaults to 0, which byte reverses the
3571 whole bitstring.
3572 start -- Start bit position, defaults to 0.
3573 end -- End bit position, defaults to self.len.
3574 repeat -- If True (the default) the byte swapping pattern is repeated
3575 as much as possible.
3576
3577 """
3578 start, end = self._validate_slice(start, end)
3579 if fmt is None or fmt == 0:
3580 # reverse all of the whole bytes.
3581 bytesizes = [(end - start) // 8]
3582 elif isinstance(fmt, numbers.Integral):
3583 if fmt < 0:
3584 raise ValueError("Improper byte length {0}.".format(fmt))
3585 bytesizes = [fmt]
3586 elif isinstance(fmt, basestring):
3587 m = STRUCT_PACK_RE.match(fmt)
3588 if not m:
3589 raise ValueError("Cannot parse format string {0}.".format(fmt))
3590 # Split the format string into a list of 'q', '4h' etc.
3591 formatlist = re.findall(STRUCT_SPLIT_RE, m.group('fmt'))
3592 # Now deal with multiplicative factors, 4h -> hhhh etc.
3593 bytesizes = []
3594 for f in formatlist:
3595 if len(f) == 1:
3596 bytesizes.append(PACK_CODE_SIZE[f])
3597 else:
3598 bytesizes.extend([PACK_CODE_SIZE[f[-1]]] * int(f[:-1]))
3599 elif isinstance(fmt, collections.abc.Iterable):
3600 bytesizes = fmt
3601 for bytesize in bytesizes:
3602 if not isinstance(bytesize, numbers.Integral) or bytesize < 0:
3603 raise ValueError("Improper byte length {0}.".format(bytesize))
3604 else:
3605 raise TypeError("Format must be an integer, string or iterable.")
3606
3607 repeats = 0
3608 totalbitsize = 8 * sum(bytesizes)
3609 if not totalbitsize:
3610 return 0
3611 if repeat:
3612 # Try to repeat up to the end of the bitstring.
3613 finalbit = end
3614 else:
3615 # Just try one (set of) byteswap(s).
3616 finalbit = start + totalbitsize
3617 for patternend in xrange(start + totalbitsize, finalbit + 1, totalbitsize):
3618 bytestart = patternend - totalbitsize
3619 for bytesize in bytesizes:
3620 byteend = bytestart + bytesize * 8
3621 self._reversebytes(bytestart, byteend)
3622 bytestart += bytesize * 8
3623 repeats += 1
3624 return repeats
3625
3626 def clear(self):
3627 """Remove all bits, reset to zero length."""
3628 self._clear()
3629
3630 def copy(self):
3631 """Return a copy of the bitstring."""
3632 return self._copy()
3633
3634 int = property(Bits._getint, Bits._setint,
3635 doc="""The bitstring as a two's complement signed int. Read and write.
3636 """)
3637 uint = property(Bits._getuint, Bits._setuint,
3638 doc="""The bitstring as a two's complement unsigned int. Read and write.
3639 """)
3640 float = property(Bits._getfloat, Bits._setfloat,
3641 doc="""The bitstring as a floating point number. Read and write.
3642 """)
3643 intbe = property(Bits._getintbe, Bits._setintbe,
3644 doc="""The bitstring as a two's complement big-endian signed int. Read and write.
3645 """)
3646 uintbe = property(Bits._getuintbe, Bits._setuintbe,
3647 doc="""The bitstring as a two's complement big-endian unsigned int. Read and write.
3648 """)
3649 floatbe = property(Bits._getfloat, Bits._setfloat,
3650 doc="""The bitstring as a big-endian floating point number. Read and write.
3651 """)
3652 intle = property(Bits._getintle, Bits._setintle,
3653 doc="""The bitstring as a two's complement little-endian signed int. Read and write.
3654 """)
3655 uintle = property(Bits._getuintle, Bits._setuintle,
3656 doc="""The bitstring as a two's complement little-endian unsigned int. Read and write.
3657 """)
3658 floatle = property(Bits._getfloatle, Bits._setfloatle,
3659 doc="""The bitstring as a little-endian floating point number. Read and write.
3660 """)
3661 intne = property(Bits._getintne, Bits._setintne,
3662 doc="""The bitstring as a two's complement native-endian signed int. Read and write.
3663 """)
3664 uintne = property(Bits._getuintne, Bits._setuintne,
3665 doc="""The bitstring as a two's complement native-endian unsigned int. Read and write.
3666 """)
3667 floatne = property(Bits._getfloatne, Bits._setfloatne,
3668 doc="""The bitstring as a native-endian floating point number. Read and write.
3669 """)
3670 ue = property(Bits._getue, Bits._setue,
3671 doc="""The bitstring as an unsigned exponential-Golomb code. Read and write.
3672 """)
3673 se = property(Bits._getse, Bits._setse,
3674 doc="""The bitstring as a signed exponential-Golomb code. Read and write.
3675 """)
3676 uie = property(Bits._getuie, Bits._setuie,
3677 doc="""The bitstring as an unsigned interleaved exponential-Golomb code. Read and write.
3678 """)
3679 sie = property(Bits._getsie, Bits._setsie,
3680 doc="""The bitstring as a signed interleaved exponential-Golomb code. Read and write.
3681 """)
3682 hex = property(Bits._gethex, Bits._sethex,
3683 doc="""The bitstring as a hexadecimal string. Read and write.
3684 """)
3685 bin = property(Bits._getbin, Bits._setbin_safe,
3686 doc="""The bitstring as a binary string. Read and write.
3687 """)
3688 oct = property(Bits._getoct, Bits._setoct,
3689 doc="""The bitstring as an octal string. Read and write.
3690 """)
3691 bool = property(Bits._getbool, Bits._setbool,
3692 doc="""The bitstring as a bool (True or False). Read and write.
3693 """)
3694 bytes = property(Bits._getbytes, Bits._setbytes_safe,
3695 doc="""The bitstring as a ordinary string. Read and write.
3696 """)
3697
3698
3699class ConstBitStream(Bits):
3700 """A container or stream holding an immutable sequence of bits.
3701
3702 For a mutable container use the BitStream class instead.
3703
3704 Methods inherited from Bits:
3705
3706 all() -- Check if all specified bits are set to 1 or 0.
3707 any() -- Check if any of specified bits are set to 1 or 0.
3708 count() -- Count the number of bits set to 1 or 0.
3709 cut() -- Create generator of constant sized chunks.
3710 endswith() -- Return whether the bitstring ends with a sub-string.
3711 find() -- Find a sub-bitstring in the current bitstring.
3712 findall() -- Find all occurrences of a sub-bitstring in the current bitstring.
3713 join() -- Join bitstrings together using current bitstring.
3714 rfind() -- Seek backwards to find a sub-bitstring.
3715 split() -- Create generator of chunks split by a delimiter.
3716 startswith() -- Return whether the bitstring starts with a sub-bitstring.
3717 tobytes() -- Return bitstring as bytes, padding if needed.
3718 tofile() -- Write bitstring to file, padding if needed.
3719 unpack() -- Interpret bits using format string.
3720
3721 Other methods:
3722
3723 bytealign() -- Align to next byte boundary.
3724 peek() -- Peek at and interpret next bits as a single item.
3725 peeklist() -- Peek at and interpret next bits as a list of items.
3726 read() -- Read and interpret next bits as a single item.
3727 readlist() -- Read and interpret next bits as a list of items.
3728
3729 Special methods:
3730
3731 Also available are the operators [], ==, !=, +, *, ~, <<, >>, &, |, ^.
3732
3733 Properties:
3734
3735 bin -- The bitstring as a binary string.
3736 bool -- For single bit bitstrings, interpret as True or False.
3737 bytepos -- The current byte position in the bitstring.
3738 bytes -- The bitstring as a bytes object.
3739 float -- Interpret as a floating point number.
3740 floatbe -- Interpret as a big-endian floating point number.
3741 floatle -- Interpret as a little-endian floating point number.
3742 floatne -- Interpret as a native-endian floating point number.
3743 hex -- The bitstring as a hexadecimal string.
3744 int -- Interpret as a two's complement signed integer.
3745 intbe -- Interpret as a big-endian signed integer.
3746 intle -- Interpret as a little-endian signed integer.
3747 intne -- Interpret as a native-endian signed integer.
3748 len -- Length of the bitstring in bits.
3749 oct -- The bitstring as an octal string.
3750 pos -- The current bit position in the bitstring.
3751 se -- Interpret as a signed exponential-Golomb code.
3752 ue -- Interpret as an unsigned exponential-Golomb code.
3753 sie -- Interpret as a signed interleaved exponential-Golomb code.
3754 uie -- Interpret as an unsigned interleaved exponential-Golomb code.
3755 uint -- Interpret as a two's complement unsigned integer.
3756 uintbe -- Interpret as a big-endian unsigned integer.
3757 uintle -- Interpret as a little-endian unsigned integer.
3758 uintne -- Interpret as a native-endian unsigned integer.
3759
3760 """
3761
3762 __slots__ = ('_pos')
3763
3764 def __init__(self, auto=None, length=None, offset=None, **kwargs):
3765 """Either specify an 'auto' initialiser:
3766 auto -- a string of comma separated tokens, an integer, a file object,
3767 a bytearray, a boolean iterable or another bitstring.
3768
3769 Or initialise via **kwargs with one (and only one) of:
3770 bytes -- raw data as a string, for example read from a binary file.
3771 bin -- binary string representation, e.g. '0b001010'.
3772 hex -- hexadecimal string representation, e.g. '0x2ef'
3773 oct -- octal string representation, e.g. '0o777'.
3774 uint -- an unsigned integer.
3775 int -- a signed integer.
3776 float -- a floating point number.
3777 uintbe -- an unsigned big-endian whole byte integer.
3778 intbe -- a signed big-endian whole byte integer.
3779 floatbe - a big-endian floating point number.
3780 uintle -- an unsigned little-endian whole byte integer.
3781 intle -- a signed little-endian whole byte integer.
3782 floatle -- a little-endian floating point number.
3783 uintne -- an unsigned native-endian whole byte integer.
3784 intne -- a signed native-endian whole byte integer.
3785 floatne -- a native-endian floating point number.
3786 se -- a signed exponential-Golomb code.
3787 ue -- an unsigned exponential-Golomb code.
3788 sie -- a signed interleaved exponential-Golomb code.
3789 uie -- an unsigned interleaved exponential-Golomb code.
3790 bool -- a boolean (True or False).
3791 filename -- a file which will be opened in binary read-only mode.
3792
3793 Other keyword arguments:
3794 length -- length of the bitstring in bits, if needed and appropriate.
3795 It must be supplied for all integer and float initialisers.
3796 offset -- bit offset to the data. These offset bits are
3797 ignored and this is intended for use when
3798 initialising using 'bytes' or 'filename'.
3799
3800 """
3801 self._pos_pos = 0
3802
3803 def __new__(cls, auto=None, length=None, offset=None, **kwargs):
3804 x = super(ConstBitStream, cls).__new__(cls)
3805 x._initialise(auto, length, offset, **kwargs)
3806 return x
3807
3808 def _setbytepos(self, bytepos):
3809 """Move to absolute byte-aligned position in stream."""
3810 self._setbitpos(bytepos * 8)
3811
3812 def _getbytepos(self):
3813 """Return the current position in the stream in bytes. Must be byte aligned."""
3814 if self._pos_pos % 8:
3815 raise ByteAlignError("Not byte aligned in _getbytepos().")
3816 return self._pos_pos // 8
3817
3818 def _setbitpos(self, pos):
3819 """Move to absolute postion bit in bitstream."""
3820 if pos < 0:
3821 raise ValueError("Bit position cannot be negative.")
3822 if pos > self.len:
3823 raise ValueError("Cannot seek past the end of the data.")
3824 self._pos_pos = pos
3825
3826 def _getbitpos(self):
3827 """Return the current position in the stream in bits."""
3828 return self._pos_pos
3829
3830 def _clear(self):
3831 Bits._clear(self)
3832 self._pos_pos = 0
3833
3834 def __copy__(self):
3835 """Return a new copy of the ConstBitStream for the copy module."""
3836 # Note that if you want a new copy (different ID), use _copy instead.
3837 # The copy can use the same datastore as it's immutable.
3838 s = ConstBitStream()
3839 s._datastore = self._datastore
3840 # Reset the bit position, don't copy it.
3841 s._pos = 0
3842 return s
3843
3844 def __add__(self, bs):
3845 """Concatenate bitstrings and return new bitstring.
3846
3847 bs -- the bitstring to append.
3848
3849 """
3850 s = Bits.__add__(self, bs)
3851 s._pos = 0
3852 return s
3853
3854 def read(self, fmt):
3855 """Interpret next bits according to the format string and return result.
3856
3857 fmt -- Token string describing how to interpret the next bits.
3858
3859 Token examples: 'int:12' : 12 bits as a signed integer
3860 'uint:8' : 8 bits as an unsigned integer
3861 'float:64' : 8 bytes as a big-endian float
3862 'intbe:16' : 2 bytes as a big-endian signed integer
3863 'uintbe:16' : 2 bytes as a big-endian unsigned integer
3864 'intle:32' : 4 bytes as a little-endian signed integer
3865 'uintle:32' : 4 bytes as a little-endian unsigned integer
3866 'floatle:64': 8 bytes as a little-endian float
3867 'intne:24' : 3 bytes as a native-endian signed integer
3868 'uintne:24' : 3 bytes as a native-endian unsigned integer
3869 'floatne:32': 4 bytes as a native-endian float
3870 'hex:80' : 80 bits as a hex string
3871 'oct:9' : 9 bits as an octal string
3872 'bin:1' : single bit binary string
3873 'ue' : next bits as unsigned exp-Golomb code
3874 'se' : next bits as signed exp-Golomb code
3875 'uie' : next bits as unsigned interleaved exp-Golomb code
3876 'sie' : next bits as signed interleaved exp-Golomb code
3877 'bits:5' : 5 bits as a bitstring
3878 'bytes:10' : 10 bytes as a bytes object
3879 'bool' : 1 bit as a bool
3880 'pad:3' : 3 bits of padding to ignore - returns None
3881
3882 fmt may also be an integer, which will be treated like the 'bits' token.
3883
3884 The position in the bitstring is advanced to after the read items.
3885
3886 Raises ReadError if not enough bits are available.
3887 Raises ValueError if the format is not understood.
3888
3889 """
3890 if isinstance(fmt, numbers.Integral):
3891 if fmt < 0:
3892 raise ValueError("Cannot read negative amount.")
3893 if fmt > self.len - self._pos_pos:
3894 raise ReadError("Cannot read {0} bits, only {1} available.",
3895 fmt, self.len - self._pos_pos)
3896 bs = self._slice(self._pos_pos, self._pos_pos + fmt)
3897 self._pos_pos += fmt
3898 return bs
3899 p = self._pos_pos
3900 _, token = tokenparser(fmt)
3901 if len(token) != 1:
3902 self._pos_pos = p
3903 raise ValueError("Format string should be a single token, not {0} "
3904 "tokens - use readlist() instead.".format(len(token)))
3905 name, length, _ = token[0]
3906 if length is None:
3907 length = self.len - self._pos_pos
3908 value, self._pos_pos = self._readtoken(name, self._pos_pos, length)
3909 return value
3910
3911 def readlist(self, fmt, **kwargs):
3912 """Interpret next bits according to format string(s) and return list.
3913
3914 fmt -- A single string or list of strings with comma separated tokens
3915 describing how to interpret the next bits in the bitstring. Items
3916 can also be integers, for reading new bitstring of the given length.
3917 kwargs -- A dictionary or keyword-value pairs - the keywords used in the
3918 format string will be replaced with their given value.
3919
3920 The position in the bitstring is advanced to after the read items.
3921
3922 Raises ReadError is not enough bits are available.
3923 Raises ValueError if the format is not understood.
3924
3925 See the docstring for 'read' for token examples. 'pad' tokens are skipped
3926 and not added to the returned list.
3927
3928 >>> h, b1, b2 = s.readlist('hex:20, bin:5, bin:3')
3929 >>> i, bs1, bs2 = s.readlist(['uint:12', 10, 10])
3930
3931 """
3932 value, self._pos_pos = self._readlist(fmt, self._pos_pos, **kwargs)
3933 return value
3934
3935 def readto(self, bs, bytealigned=None):
3936 """Read up to and including next occurrence of bs and return result.
3937
3938 bs -- The bitstring to find. An integer is not permitted.
3939 bytealigned -- If True the bitstring will only be
3940 found on byte boundaries.
3941
3942 Raises ValueError if bs is empty.
3943 Raises ReadError if bs is not found.
3944
3945 """
3946 if isinstance(bs, numbers.Integral):
3947 raise ValueError("Integers cannot be searched for")
3948 bs = Bits(bs)
3949 oldpos = self._pos_pos
3950 p = self.find(bs, self._pos_pos, bytealigned=bytealigned)
3951 if not p:
3952 raise ReadError("Substring not found")
3953 self._pos_pos += bs.len
3954 return self._slice(oldpos, self._pos_pos)
3955
3956 def peek(self, fmt):
3957 """Interpret next bits according to format string and return result.
3958
3959 fmt -- Token string describing how to interpret the next bits.
3960
3961 The position in the bitstring is not changed. If not enough bits are
3962 available then all bits to the end of the bitstring will be used.
3963
3964 Raises ReadError if not enough bits are available.
3965 Raises ValueError if the format is not understood.
3966
3967 See the docstring for 'read' for token examples.
3968
3969 """
3970 pos_before = self._pos_pos
3971 value = self.read(fmt)
3972 self._pos_pos = pos_before
3973 return value
3974
3975 def peeklist(self, fmt, **kwargs):
3976 """Interpret next bits according to format string(s) and return list.
3977
3978 fmt -- One or more strings with comma separated tokens describing
3979 how to interpret the next bits in the bitstring.
3980 kwargs -- A dictionary or keyword-value pairs - the keywords used in the
3981 format string will be replaced with their given value.
3982
3983 The position in the bitstring is not changed. If not enough bits are
3984 available then all bits to the end of the bitstring will be used.
3985
3986 Raises ReadError if not enough bits are available.
3987 Raises ValueError if the format is not understood.
3988
3989 See the docstring for 'read' for token examples.
3990
3991 """
3992 pos = self._pos_pos
3993 return_values = self.readlist(fmt, **kwargs)
3994 self._pos_pos = pos
3995 return return_values
3996
3997 def bytealign(self):
3998 """Align to next byte and return number of skipped bits.
3999
4000 Raises ValueError if the end of the bitstring is reached before
4001 aligning to the next byte.
4002
4003 """
4004 skipped = (8 - (self._pos_pos % 8)) % 8
4005 self.pos += self._offset + skipped
4006 assert self._assertsanity()
4007 return skipped
4008
4009 pos = property(_getbitpos, _setbitpos,
4010 doc="""The position in the bitstring in bits. Read and write.
4011 """)
4012 bitpos = property(_getbitpos, _setbitpos,
4013 doc="""The position in the bitstring in bits. Read and write.
4014 """)
4015 bytepos = property(_getbytepos, _setbytepos,
4016 doc="""The position in the bitstring in bytes. Read and write.
4017 """)
4018
4019
4020class BitStream(ConstBitStream, BitArray):
4021 """A container or stream holding a mutable sequence of bits
4022
4023 Subclass of the ConstBitStream and BitArray classes. Inherits all of
4024 their methods.
4025
4026 Methods:
4027
4028 all() -- Check if all specified bits are set to 1 or 0.
4029 any() -- Check if any of specified bits are set to 1 or 0.
4030 append() -- Append a bitstring.
4031 bytealign() -- Align to next byte boundary.
4032 byteswap() -- Change byte endianness in-place.
4033 count() -- Count the number of bits set to 1 or 0.
4034 cut() -- Create generator of constant sized chunks.
4035 endswith() -- Return whether the bitstring ends with a sub-string.
4036 find() -- Find a sub-bitstring in the current bitstring.
4037 findall() -- Find all occurrences of a sub-bitstring in the current bitstring.
4038 insert() -- Insert a bitstring.
4039 invert() -- Flip bit(s) between one and zero.
4040 join() -- Join bitstrings together using current bitstring.
4041 overwrite() -- Overwrite a section with a new bitstring.
4042 peek() -- Peek at and interpret next bits as a single item.
4043 peeklist() -- Peek at and interpret next bits as a list of items.
4044 prepend() -- Prepend a bitstring.
4045 read() -- Read and interpret next bits as a single item.
4046 readlist() -- Read and interpret next bits as a list of items.
4047 replace() -- Replace occurrences of one bitstring with another.
4048 reverse() -- Reverse bits in-place.
4049 rfind() -- Seek backwards to find a sub-bitstring.
4050 rol() -- Rotate bits to the left.
4051 ror() -- Rotate bits to the right.
4052 set() -- Set bit(s) to 1 or 0.
4053 split() -- Create generator of chunks split by a delimiter.
4054 startswith() -- Return whether the bitstring starts with a sub-bitstring.
4055 tobytes() -- Return bitstring as bytes, padding if needed.
4056 tofile() -- Write bitstring to file, padding if needed.
4057 unpack() -- Interpret bits using format string.
4058
4059 Special methods:
4060
4061 Mutating operators are available: [], <<=, >>=, +=, *=, &=, |= and ^=
4062 in addition to [], ==, !=, +, *, ~, <<, >>, &, | and ^.
4063
4064 Properties:
4065
4066 bin -- The bitstring as a binary string.
4067 bool -- For single bit bitstrings, interpret as True or False.
4068 bytepos -- The current byte position in the bitstring.
4069 bytes -- The bitstring as a bytes object.
4070 float -- Interpret as a floating point number.
4071 floatbe -- Interpret as a big-endian floating point number.
4072 floatle -- Interpret as a little-endian floating point number.
4073 floatne -- Interpret as a native-endian floating point number.
4074 hex -- The bitstring as a hexadecimal string.
4075 int -- Interpret as a two's complement signed integer.
4076 intbe -- Interpret as a big-endian signed integer.
4077 intle -- Interpret as a little-endian signed integer.
4078 intne -- Interpret as a native-endian signed integer.
4079 len -- Length of the bitstring in bits.
4080 oct -- The bitstring as an octal string.
4081 pos -- The current bit position in the bitstring.
4082 se -- Interpret as a signed exponential-Golomb code.
4083 ue -- Interpret as an unsigned exponential-Golomb code.
4084 sie -- Interpret as a signed interleaved exponential-Golomb code.
4085 uie -- Interpret as an unsigned interleaved exponential-Golomb code.
4086 uint -- Interpret as a two's complement unsigned integer.
4087 uintbe -- Interpret as a big-endian unsigned integer.
4088 uintle -- Interpret as a little-endian unsigned integer.
4089 uintne -- Interpret as a native-endian unsigned integer.
4090
4091 """
4092
4093 __slots__ = ()
4094
4095 # As BitStream objects are mutable, we shouldn't allow them to be hashed.
4096 __hash__ = None
4097
4098 def __init__(self, auto=None, length=None, offset=None, **kwargs):
4099 """Either specify an 'auto' initialiser:
4100 auto -- a string of comma separated tokens, an integer, a file object,
4101 a bytearray, a boolean iterable or another bitstring.
4102
4103 Or initialise via **kwargs with one (and only one) of:
4104 bytes -- raw data as a string, for example read from a binary file.
4105 bin -- binary string representation, e.g. '0b001010'.
4106 hex -- hexadecimal string representation, e.g. '0x2ef'
4107 oct -- octal string representation, e.g. '0o777'.
4108 uint -- an unsigned integer.
4109 int -- a signed integer.
4110 float -- a floating point number.
4111 uintbe -- an unsigned big-endian whole byte integer.
4112 intbe -- a signed big-endian whole byte integer.
4113 floatbe - a big-endian floating point number.
4114 uintle -- an unsigned little-endian whole byte integer.
4115 intle -- a signed little-endian whole byte integer.
4116 floatle -- a little-endian floating point number.
4117 uintne -- an unsigned native-endian whole byte integer.
4118 intne -- a signed native-endian whole byte integer.
4119 floatne -- a native-endian floating point number.
4120 se -- a signed exponential-Golomb code.
4121 ue -- an unsigned exponential-Golomb code.
4122 sie -- a signed interleaved exponential-Golomb code.
4123 uie -- an unsigned interleaved exponential-Golomb code.
4124 bool -- a boolean (True or False).
4125 filename -- a file which will be opened in binary read-only mode.
4126
4127 Other keyword arguments:
4128 length -- length of the bitstring in bits, if needed and appropriate.
4129 It must be supplied for all integer and float initialisers.
4130 offset -- bit offset to the data. These offset bits are
4131 ignored and this is intended for use when
4132 initialising using 'bytes' or 'filename'.
4133
4134 """
4135 self._pos_pos_pos_pos = 0
4136 # For mutable BitStreams we always read in files to memory:
4137 if not isinstance(self._datastore, ByteStore):
4138 self._ensureinmemory()
4139
4140 def __new__(cls, auto=None, length=None, offset=None, **kwargs):
4141 x = super(BitStream, cls).__new__(cls)
4142 x._initialise(auto, length, offset, **kwargs)
4143 return x
4144
4145 def __copy__(self):
4146 """Return a new copy of the BitStream."""
4147 s_copy = BitStream()
4148 s_copy._pos = 0
4149 if not isinstance(self._datastore, ByteStore):
4150 # Let them both point to the same (invariant) array.
4151 # If either gets modified then at that point they'll be read into memory.
4152 s_copy._datastore = self._datastore
4153 else:
4154 s_copy._datastore = ByteStore(self._datastore._rawarray[:],
4155 self._datastore.bitlength,
4156 self._datastore.offset)
4157 return s_copy
4158
4159 def prepend(self, bs):
4160 """Prepend a bitstring to the current bitstring.
4161
4162 bs -- The bitstring to prepend.
4163
4164 """
4165 bs = self._converttobitstring(bs)
4166 self._prepend(bs)
4167 self._pos_pos_pos_pos += bs.len
4168
4169
4170def pack(fmt, *values, **kwargs):
4171 """Pack the values according to the format string and return a new BitStream.
4172
4173 fmt -- A single string or a list of strings with comma separated tokens
4174 describing how to create the BitStream.
4175 values -- Zero or more values to pack according to the format.
4176 kwargs -- A dictionary or keyword-value pairs - the keywords used in the
4177 format string will be replaced with their given value.
4178
4179 Token examples: 'int:12' : 12 bits as a signed integer
4180 'uint:8' : 8 bits as an unsigned integer
4181 'float:64' : 8 bytes as a big-endian float
4182 'intbe:16' : 2 bytes as a big-endian signed integer
4183 'uintbe:16' : 2 bytes as a big-endian unsigned integer
4184 'intle:32' : 4 bytes as a little-endian signed integer
4185 'uintle:32' : 4 bytes as a little-endian unsigned integer
4186 'floatle:64': 8 bytes as a little-endian float
4187 'intne:24' : 3 bytes as a native-endian signed integer
4188 'uintne:24' : 3 bytes as a native-endian unsigned integer
4189 'floatne:32': 4 bytes as a native-endian float
4190 'hex:80' : 80 bits as a hex string
4191 'oct:9' : 9 bits as an octal string
4192 'bin:1' : single bit binary string
4193 'ue' / 'uie': next bits as unsigned exp-Golomb code
4194 'se' / 'sie': next bits as signed exp-Golomb code
4195 'bits:5' : 5 bits as a bitstring object
4196 'bytes:10' : 10 bytes as a bytes object
4197 'bool' : 1 bit as a bool
4198 'pad:3' : 3 zero bits as padding
4199
4200 >>> s = pack('uint:12, bits', 100, '0xffe')
4201 >>> t = pack(['bits', 'bin:3'], s, '111')
4202 >>> u = pack('uint:8=a, uint:8=b, uint:55=a', a=6, b=44)
4203
4204 """
4205 tokens = []
4206 if isinstance(fmt, basestring):
4207 fmt = [fmt]
4208 try:
4209 for f_item in fmt:
4210 _, tkns = tokenparser(f_item, tuple(sorted(kwargs.keys())))
4211 tokens.extend(tkns)
4212 except ValueError as e:
4213 raise CreationError(*e.args)
4214 value_iter = iter(values)
4215 s = BitStream()
4216 try:
4217 for name, length, value in tokens:
4218 # If the value is in the kwd dictionary then it takes precedence.
4219 if value in kwargs:
4220 value = kwargs[value]
4221 # If the length is in the kwd dictionary then use that too.
4222 if length in kwargs:
4223 length = kwargs[length]
4224 # Also if we just have a dictionary name then we want to use it
4225 if name in kwargs and length is None and value is None:
4226 s.append(kwargs[name])
4227 continue
4228 if length is not None:
4229 length = int(length)
4230 if value is None and name != 'pad':
4231 # Take the next value from the ones provided
4232 value = next(value_iter)
4233 s._append(BitStream._init_with_token(name, length, value))
4234 except StopIteration:
4235 raise CreationError("Not enough parameters present to pack according to the "
4236 "format. {0} values are needed.", len(tokens))
4237 try:
4238 next(value_iter)
4239 except StopIteration:
4240 # Good, we've used up all the *values.
4241 return s
4242 raise CreationError("Too many parameters present to pack according to the format.")
4243
4244
4245# Aliases for backward compatibility
4246ConstBitArray = Bits
4247BitString = BitStream
4248
4249__all__ = ['ConstBitArray', 'ConstBitStream', 'BitStream', 'BitArray',
4250 'Bits', 'BitString', 'pack', 'Error', 'ReadError',
4251 'InterpretError', 'ByteAlignError', 'CreationError', 'bytealigned']
def byteswap(self, fmt=None, start=None, end=None, repeat=True)
Definition: bitstring.py:3566
def ror(self, bits, start=None, end=None)
Definition: bitstring.py:3522
def reverse(self, start=None, end=None)
Definition: bitstring.py:3449
def replace(self, old, new, start=None, end=None, count=None, bytealigned=None)
Definition: bitstring.py:3312
def invert(self, pos=None)
Definition: bitstring.py:3499
def overwrite(self, bs, pos=None)
Definition: bitstring.py:3403
def __delitem__(self, key)
Definition: bitstring.py:3200
def set(self, value, pos=None)
Definition: bitstring.py:3469
def insert(self, bs, pos=None)
Definition: bitstring.py:3378
def rol(self, bits, start=None, end=None)
Definition: bitstring.py:3544
def __init__(self, auto=None, length=None, offset=None, **kwargs)
Definition: bitstring.py:3025
def __setitem__(self, key, value)
Definition: bitstring.py:3092
def __init__(self, auto=None, length=None, offset=None, **kwargs)
Definition: bitstring.py:4098
def __contains__(self, bs)
Definition: bitstring.py:1157
def _invert(self, pos)
Definition: bitstring.py:2184
def _readuint(self, length, start)
Definition: bitstring.py:1399
def _setauto(self, s, length, offset)
Definition: bitstring.py:1256
def _readint(self, length, start)
Definition: bitstring.py:1444
def _truncatestart(self, bits)
Definition: bitstring.py:2055
def __mul__(self, n)
Definition: bitstring.py:1055
def _readbits(self, length, start)
Definition: bitstring.py:2249
def _readoct(self, length, start)
Definition: bitstring.py:1891
def _findregex(self, reg_ex, start, end, bytealigned)
Definition: bitstring.py:2392
def _initialise_from_auto(self, auto, length, offset)
Definition: bitstring.py:844
def _setuie(self, i)
Definition: bitstring.py:1728
def _setbytes_safe(self, data, length=None, offset=0)
Definition: bitstring.py:1325
def _readintbe(self, length, start)
Definition: bitstring.py:1483
def _unset(self, pos)
Definition: bitstring.py:2179
def _imul(self, n)
Definition: bitstring.py:2210
def __lshift__(self, n)
Definition: bitstring.py:1024
def _setbin_safe(self, binstring)
Definition: bitstring.py:1834
def find(self, bs, start=None, end=None, bytealigned=None)
Definition: bitstring.py:2424
def _ior(self, bs)
Definition: bitstring.py:2240
def findall(self, bs, start=None, end=None, count=None, bytealigned=None)
Definition: bitstring.py:2462
def _setfile(self, filename, length, offset)
Definition: bitstring.py:1310
def _readintle(self, length, start)
Definition: bitstring.py:1539
def _readfloat(self, length, start)
Definition: bitstring.py:1568
def __and__(self, bs)
Definition: bitstring.py:1079
def _truncateend(self, bits)
Definition: bitstring.py:2068
def _append(self, bs)
Definition: bitstring.py:2035
def __ror__(self, bs)
Definition: bitstring.py:1121
def _iand(self, bs)
Definition: bitstring.py:2243
def _setintbe(self, intbe, length=None)
Definition: bitstring.py:1476
def _readuie(self, pos)
Definition: bitstring.py:1739
def __ne__(self, bs)
Definition: bitstring.py:1003
def _reversebytes(self, start, end)
Definition: bitstring.py:2161
def __rshift__(self, n)
Definition: bitstring.py:1039
def _readhex(self, length, start)
Definition: bitstring.py:1928
def _readue(self, pos)
Definition: bitstring.py:1651
def _validate_slice(self, start, end)
Definition: bitstring.py:2253
def rfind(self, bs, start=None, end=None, bytealigned=None)
Definition: bitstring.py:2514
def _set(self, pos)
Definition: bitstring.py:2174
def _irshift(self, n)
Definition: bitstring.py:2203
def _setbin_unsafe(self, binstring)
Definition: bitstring.py:1841
def __rand__(self, bs)
Definition: bitstring.py:1095
def __rxor__(self, bs)
Definition: bitstring.py:1147
def _findbytes(self, bytes_, start, end, bytealigned)
Definition: bitstring.py:2365
def __radd__(self, bs)
Definition: bitstring.py:884
def cut(self, bits, start=None, end=None, count=None)
Definition: bitstring.py:2553
def _setse(self, i)
Definition: bitstring.py:1690
def _readsie(self, pos)
Definition: bitstring.py:1795
def _readbytes(self, length, start)
Definition: bitstring.py:1346
def _setoct(self, octstring)
Definition: bitstring.py:1876
def split(self, delimiter, start=None, end=None, count=None, bytealigned=None)
Definition: bitstring.py:2581
def _ixor(self, bs)
Definition: bitstring.py:2246
def any(self, value, pos=None)
Definition: bitstring.py:2754
def _readse(self, pos)
Definition: bitstring.py:1712
def tofile(self, f)
Definition: bitstring.py:2668
def _readfloatle(self, length, start)
Definition: bitstring.py:1606
def __getitem__(self, key)
Definition: bitstring.py:893
def _setue(self, i)
Definition: bitstring.py:1629
def _delete(self, bits, pos)
Definition: bitstring.py:2135
def startswith(self, prefix, start=None, end=None)
Definition: bitstring.py:2702
def __eq__(self, bs)
Definition: bitstring.py:990
def _sethex(self, hexstring)
Definition: bitstring.py:1910
def _slice(self, start, end)
Definition: bitstring.py:2008
def _readbin(self, length, start)
Definition: bitstring.py:1855
def _insert(self, bs, pos)
Definition: bitstring.py:2081
def _setuintbe(self, uintbe, length=None)
Definition: bitstring.py:1458
def __add__(self, bs)
Definition: bitstring.py:868
def endswith(self, suffix, start=None, end=None)
Definition: bitstring.py:2717
def __xor__(self, bs)
Definition: bitstring.py:1131
def _prepend(self, bs)
Definition: bitstring.py:2039
def __rmul__(self, n)
Definition: bitstring.py:1070
def _ensureinmemory(self)
Definition: bitstring.py:1960
def _readuintbe(self, length, start)
Definition: bitstring.py:1465
def _readlist(self, fmt, pos, **kwargs)
Definition: bitstring.py:2288
def __or__(self, bs)
Definition: bitstring.py:1105
def count(self, value)
Definition: bitstring.py:2776
def all(self, value, pos=None)
Definition: bitstring.py:2732
def __init__(self, auto=None, length=None, offset=None, **kwargs)
Definition: bitstring.py:742
def _converttobitstring(cls, bs, offset=0, cache=None)
Definition: bitstring.py:1966
def _inplace_logical_helper(self, bs, f)
Definition: bitstring.py:2224
def _readuintle(self, length, start)
Definition: bitstring.py:1501
def _setuint(self, uint, length=None)
Definition: bitstring.py:1362
def _setsie(self, i)
Definition: bitstring.py:1773
def _setint(self, int_, length=None)
Definition: bitstring.py:1421
def _ilshift(self, n)
Definition: bitstring.py:2196
def _overwrite(self, bs, pos)
Definition: bitstring.py:2102
def _readtoken(self, name, pos, length)
Definition: bitstring.py:2019
def _setbytes_unsafe(self, data, length, offset)
Definition: bitstring.py:1341
def readto(self, bs, bytealigned=None)
Definition: bitstring.py:3935
def peeklist(self, fmt, **kwargs)
Definition: bitstring.py:3975
def readlist(self, fmt, **kwargs)
Definition: bitstring.py:3911
def _setbytepos(self, bytepos)
Definition: bitstring.py:3808
def __init__(self, auto=None, length=None, offset=None, **kwargs)
Definition: bitstring.py:3764
def getbyteslice(self, start, end)
Definition: bitstring.py:159
def _prependstore(self, store)
Definition: bitstring.py:191
def _appendstore(self, store)
Definition: bitstring.py:175
def __init__(self, data, bitlength=None, offset=None)
Definition: bitstring.py:140
def equal(a, b)
Definition: bitstring.py:292
def tokenparser(fmt, keys=None, token_cache=None)
Definition: bitstring.py:540
def tidy_input_string(s)
Definition: bitstring.py:463
def expand_brackets(s)
Definition: bitstring.py:648
def structparser(token)
Definition: bitstring.py:511
def offsetcopy(s, newoffset)
Definition: bitstring.py:250
Definition: unpack.py:1