13 files changed, 562 insertions, 104 deletions
diff --git a/docs/Makefile b/docs/Makefile
index 80b61b62..e0ddc37d 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -1,4 +1,4 @@
-all: index.html encoding.html
+all: index.html encoding.html concepts.html reference.html
 
 %.html: %.rst
 	rst2html --stylesheet=lsr.css --link-stylesheet $< $@
 \ No newline at end of file
diff --git a/docs/concepts.rst b/docs/concepts.rst
index 30daed01..f2475aec 100644
--- a/docs/concepts.rst
+++ b/docs/concepts.rst
@@ -4,24 +4,9 @@ Nanopb: Basic concepts
 
 The things outlined here are common to both the encoder and the decoder part.
 
-Return values and error handling
-================================
-
-Most functions in nanopb return *bool*. *True* means success, *false* means failure.
-
-Because code size is of the essence, nanopb doesn't give any information about the cause of the error. However, there are few possible sources of errors:
-
-1) Running out of memory. Because everything is allocated from the stack, nanopb can't detect this itself. Encoding or decoding the same type of a message always takes the same amount of stack space. Therefore, if it works once, it works always.
-2) Invalid field description. These are usually stored as constants, so if it works under the debugger, it always does.
-3) IO errors in your own stream callbacks. Because encoding/decoding stops at the first error, you can overwrite the *state* field in the struct and store your own error code there.
-4) Errors in your callback functions. You can use the state field in the callback structure.
-5) Exceeding the max_size or bytes_left of a stream.
-6) Exceeding the max_size of a string or array field
-7) Invalid protocol buffers binary message. It's not like you could recover from it anyway, so a simple failure should be enough.
+.. sectnum::
 
-In my opinion, it is enough that 1) and 2) can be resolved using a debugger.
-
-However, you may be interested which of the remaining conditions caused the error. For 3) and 4), you can check the state. If you have to detect 5) and 6), you should convert the fields to callback type. Any remaining problem is of type 7).
+.. contents::
 
 Streams
 =======
@@ -33,7 +18,7 @@ There are a few generic rules for callback functions:
 
 #) Return false on IO errors. The encoding or decoding process will abort immediately.
 #) Use state to store your own data, such as a file descriptor.
-#) *bytes_written* and *bytes_left* are updated by *pb_write* and *pb_read*. Don't touch them.
+#) *bytes_written* and *bytes_left* are updated by pb_write and pb_read.
 #) Your callback may be used with substreams. In this case *bytes_left*, *bytes_written* and *max_size* have smaller values than the original stream. Don't use these values to calculate pointers.
 
 Output streams
@@ -51,9 +36,7 @@ Output streams
 
 The *callback* for output stream may be NULL, in which case the stream simply counts the number of bytes written. In this case, *max_size* is ignored.
 
-Otherwise, if *bytes_written* + bytes_to_be_written is larger than *max_size*, *pb_write* returns false before doing anything else. If you don't want to limit the size of the stream, pass SIZE_MAX.
-
-Most commonly you want to initialize *bytes_written* to 0. It doesn't matter to the library, though.
+Otherwise, if *bytes_written* + bytes_to_be_written is larger than *max_size*, pb_write returns false before doing anything else. If you don't want to limit the size of the stream, pass SIZE_MAX.
  
 **Example 1:**
 
@@ -68,21 +51,22 @@ This is the way to get the size of the message without storing it anywhere::
 
 Writing to stdout::
 
- bool streamcallback(pb_ostream_t *stream, const uint8_t *buf, size_t count)
+ bool callback(pb_ostream_t *stream, const uint8_t *buf, size_t count)
  {
     FILE *file = (FILE*) stream->state;
     return fwrite(buf, 1, count, file) == count;
  }
  
- pb_ostream_t stdoutstream = {&streamcallback, stdout, SIZE_MAX, 0};
+ pb_ostream_t stdoutstream = {&callback, stdout, SIZE_MAX, 0};
 
 Input streams
 -------------
 For input streams, there are a few extra rules:
+
 #) If buf is NULL, read from stream but don't store the data. This is used to skip unknown input.
 #) You don't need to know the length of the message in advance. After getting EOF error when reading, set bytes_left to 0 and return false. Pb_decode will detect this and if the EOF was in a proper position, it will return true.
 
-::
+Here is the structure::
 
  struct _pb_istream_t
  {
@@ -91,8 +75,109 @@ For input streams, there are a few extra rules:
     size_t bytes_left;
  };
 
-The *callback* must always be a function pointer.
+The *callback* must always be a function pointer. *Bytes_left* is an upper limit on the number of bytes that will be read. You can use SIZE_MAX if your callback handles EOF as described above.
+
+**Example:**
+
+This function binds an input stream to stdin:
+
+:: 
+
+ bool callback(pb_istream_t *stream, uint8_t *buf, size_t count)
+ {
+    FILE *file = (FILE*)stream->state;
+    bool status;
+    
+    if (buf == NULL)
+    {
+        while (count-- && fgetc(file) != EOF);
+        return count == 0;
+    }
+    
+    status = (fread(buf, 1, count, file) == count);
+    
+    if (feof(file))
+        stream->bytes_left = 0;
+    
+    return status;
+ }
+ 
+ pb_istream_t stdinstream = {&callback, stdin, SIZE_MAX};
+
+Data types
+==========
+
+Most Protocol Buffers datatypes have directly corresponding C datatypes, such as int32 is int32_t, float is float and bool is bool. However, the variable-length datatypes are more complex:
+
+1) Strings, bytes and repeated fields of any type map to callback functions by default.
+2) If there is a special option *(nanopb).max_size* specified in the .proto file, string maps to null-terminated char array and bytes map to a structure containing a char array and a size field.
+3) If there is a special option *(nanopb).max_count* specified on a repeated field, it maps to an array of whatever type is being repeated. Another field will be created for the actual number of entries stored.
+
+=============================================================================== =======================
+      field in .proto                                                           autogenerated in .h
+=============================================================================== =======================
+required string name = 1;                                                       pb_callback_t name;
+required string name = 1 [(nanopb).max_size = 40];                              char name[40];
+repeated string name = 1 [(nanopb).max_size = 40];                              pb_callback_t name;
+repeated string name = 1 [(nanopb).max_size = 40, (nanopb).max_count = 5];      | size_t name_count;
+                                                                                | char name[5][40];
+required bytes data = 1 [(nanopb).max_size = 40];                               | typedef struct {
+                                                                                |    size_t size;
+                                                                                |    uint8_t bytes[40];
+                                                                                | } Person_data_t;
+                                                                                | Person_data_t data;
+=============================================================================== =======================
+
+The maximum lengths are checked in runtime. If string/bytes/array exceeds the allocated length, *pb_decode* will return false. 
+
+For more information about callbacks, see the `Encoding` and `Decoding` sections.
+
+Field description array
+=======================
+
+For using the *pb_encode* and *pb_decode* functions, you need an array of pb_field_t constants describing the structure you wish to encode. This description is usually autogenerated from .proto file.
+
+::
+
+ message PhoneNumber {
+    required string number = 1 [(nanopb).max_size = 40];
+    optional PhoneType type = 2 [default = HOME];
+ }
+
+::
+
+ const pb_field_t Person_PhoneNumber_fields[3] = {
+    {1, PB_HTYPE_REQUIRED | PB_LTYPE_STRING,
+    offsetof(Person_PhoneNumber, number), 0,
+    pb_membersize(Person_PhoneNumber, number), 0, 0},
+
+    {2, PB_HTYPE_OPTIONAL | PB_LTYPE_VARINT,
+    pb_delta(Person_PhoneNumber, type, number),
+    pb_delta(Person_PhoneNumber, has_type, type),
+    pb_membersize(Person_PhoneNumber, type), 0,
+    &Person_PhoneNumber_type_default},
+
+    PB_LAST_FIELD
+ };
+
+For more information about the format, see the `Generated code` section.
+
+
+Return values and error handling
+================================
+
+Most functions in nanopb return bool: *true* means success, *false* means failure. If this is enough for you, skip this section.
+
+For simplicity, nanopb doesn't define it's own error codes. This might be added if there is a compelling need for it. You can however deduce something about the error causes:
+
+1) Running out of memory. Because everything is allocated from the stack, nanopb can't detect this itself. Encoding or decoding the same type of a message always takes the same amount of stack space. Therefore, if it works once, it works always.
+2) Invalid field description. These are usually stored as constants, so if it works under the debugger, it always does.
+3) IO errors in your own stream callbacks. Because encoding/decoding stops at the first error, you can overwrite the *state* field in the struct and store your own error code there.
+4) Errors that happen in your callback functions. You can use the state field in the callback structure.
+5) Exceeding the max_size or bytes_left of a stream.
+6) Exceeding the max_size of a string or array field
+7) Invalid protocol buffers binary message. It's not like you could recover from it anyway, so a simple failure should be enough.
 
-*Bytes_left* is an upper limit on the number of bytes that will be read. You can use SIZE_MAX if your callback handles EOF as described above.
+In my opinion, it is enough that 1. and 2. can be resolved using a debugger.
 
-**Example**
-\ No newline at end of file
+However, you may be interested which of the remaining conditions caused the error. For 3. and 4., you can set and check the state. If you have to detect 5. and 6., you should convert the fields to callback type. Any remaining problem is of type 7.
diff --git a/docs/encoding.rst b/docs/encoding.rst
index e4e0cd7f..3f673f35 100644
--- a/docs/encoding.rst
+++ b/docs/encoding.rst
@@ -4,36 +4,15 @@ Nanopb: Encoding messages
 
 The basic way to encode messages is to:
 
-1) Write a callback function for whatever stream you want to write the message to.
+1) Create an `output stream`_.
 2) Fill a structure with your data.
-3) Call pb_encode with the stream, a pointer to *const pb_field_t* array and a pointer to your structure.
+3) Call *pb_encode* with the stream, a pointer to *const pb_field_t* array and a pointer to your structure.
 
 A few extra steps are necessary if you need to know the size of the message beforehand, or if you have dynamically sized fields.
 
-Output streams
-==============
+.. _`output stream`: concepts.html#output-streams
 
-This is the contents of *pb_ostream_t* structure::
+Function: pb_encode
+===================
 
- typedef struct _pb_ostream_t pb_ostream_t;
- struct _pb_ostream_t
- {
-    bool (*callback)(pb_ostream_t *stream, const uint8_t *buf, size_t count);
-    void *state;
-    size_t max_size;
-    size_t bytes_written;
- };
 
-This, combined with the pb_write function, provides a light-weight abstraction
-for whatever destination you want to write data to.
-
-*callback* should be a pointer to your callback function. These are the rules for it:
-
-1) Return false on IO errors. This will cause encoding to abort.
- * 
- * 2) You can use state to store your own data (e.g. buffer pointer).
- * 
- * 3) pb_write will update bytes_written after your callback runs.
- * 
- * 4) Substreams will modify max_size and bytes_written. Don't use them to
- * calculate any pointers.
-\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
index 5a5cc826..0ae6f0fd 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -44,6 +44,7 @@ Features and limitations
 #) Some speed has been sacrificed for code size. For example varint calculations are always done in 64 bits.
 #) Encoding is focused on writing to streams. For memory buffers only it could be made more efficient.
 #) The deprecated Protocol Buffers feature called "groups" is not supported.
+#) Fields in the generated structs are ordered by the tag number, instead of the natural ordering in .proto file.
 
 Getting started
 ===============
diff --git a/docs/lsr.css b/docs/lsr.css
index 081bf068..81badb23 100644
--- a/docs/lsr.css
+++ b/docs/lsr.css
@@ -83,7 +83,8 @@ em {
 
 pre {
   border-left: 3px double #aaa;
-  padding-left: 10px;
+  padding: 5px 10px;
+  background-color: #f6f6f6;
 }
 
 h1.title {
@@ -202,14 +203,18 @@ table.docutils td, table.docutils th {
   padding: 0.25em 0.5em;
 }
 
+th.field-name {
+   text-align: right;
+   width: 15em;
+}
+
 table.docutils th {
-  background-color: #dddddd;
+  font-family: monospace;
+  background-color: #f6f6f6;
 }
 
 div.sidebar {
-  width: 33%;
-  float: right;
-  margin: 0em 2em;
+  margin: 0em 2em 2em 0em;
   padding: 0em 1em;
   border-top: 1px solid #aaa;
   border-left: 1px solid #aaa;
diff --git a/docs/reference.rst b/docs/reference.rst
new file mode 100644
index 00000000..4c8c874e
--- /dev/null
+++ b/docs/reference.rst
@@ -0,0 +1,385 @@
+=====================
+Nanopb: API reference
+=====================
+
+.. contents ::
+
+pb.h
+====
+
+pb_type_t
+---------
+Defines the encoder/decoder behaviour that should be used for a field. ::
+
+    typedef enum { ... } pb_type_t;
+
+The low-order byte of the enumeration values defines the function that can be used for encoding and decoding the field data:
+
+==================== ===== ================================================
+LTYPE identifier     Value Storage format
+==================== ===== ================================================
+PB_LTYPE_VARINT      0x00  Integer.
+PB_LTYPE_SVARINT     0x01  Integer, zigzag encoded.
+PB_LTYPE_FIXED       0x02  Integer or floating point.
+PB_LTYPE_BYTES       0x03  Structure with *size_t* field and byte array.
+PB_LTYPE_STRING      0x04  Null-terminated string.
+PB_LTYPE_SUBMESSAGE  0x05  Submessage structure.
+==================== ===== ================================================
+
+The high-order byte defines whether the field is required, optional, repeated or callback:
+
+==================== ===== ================================================
+HTYPE identifier     Value Field handling
+==================== ===== ================================================
+PB_HTYPE_REQUIRED    0x00  Verify that field exists in decoded message.
+PB_HTYPE_OPTIONAL    0x10  Use separate *has_<field>* boolean to specify
+                           whether the field is present.
+PB_HTYPE_ARRAY       0x20  A repeated field with preallocated array.
+PB_HTYPE_CALLBACK    0x30  A field with dynamic storage size, data is
+                           actually a pointer to a structure containing a
+                           callback function.
+==================== ===== ================================================
+
+pb_field_t
+----------
+Describes a single structure field with memory position in relation to others. ::
+
+    typedef struct _pb_field_t pb_field_t;
+    struct _pb_field_t {
+        uint8_t tag;
+        pb_type_t type;
+        uint8_t data_offset;
+        int8_t size_offset;
+        uint8_t data_size;
+        uint8_t array_size;
+        const void *ptr;
+    } pb_packed;
+
+:tag:           Tag number of the field or 0 to terminate a list of fields.
+:type:          LTYPE and HTYPE of the field.
+:data_offset:   Offset of field data, relative to the end of the previous field.
+:size_offset:   Offset of *bool* flag for optional fields or *size_t* count for arrays, relative to field data.
+:data_size:     Size of a single data entry, in bytes.
+:array_size:    Maximum number of entries in an array, if it is an array type.
+:ptr:           Pointer to default value for optional fields, or to submessage description for PB_LTYPE_SUBMESSAGE.
+
+pb_encode.h
+===========
+
+pb_ostream_from_buffer
+----------------------
+Constructs an output stream for writing into a memory buffer. This is just a helper function, it doesn't do anything you couldn't do yourself in a callback function. It uses an internal callback that stores the pointer in stream *state* field. ::
+
+    pb_ostream_t pb_ostream_from_buffer(uint8_t *buf, size_t bufsize);
+
+:buf:           Memory buffer to write into.
+:bufsize:       Maximum number of bytes to write.
+:returns:       An output stream.
+
+After writing, you can check *stream.bytes_written* to find out how much valid data there is in the buffer.
+
+pb_write
+--------
+Writes data to an output stream. Always use this function, instead of trying to call stream callback manually. ::
+
+    bool pb_write(pb_ostream_t *stream, const uint8_t *buf, size_t count);
+
+:stream:        Output stream to write to.
+:buf:           Pointer to buffer with the data to be written.
+:count:         Number of bytes to write.
+:returns:       True on success, false if maximum length is exceeded or an IO error happens.
+
+If an error happens, *bytes_written* is not incremented. Depending on the callback used, calling pb_write again after it has failed once may be dangerous. Nanopb itself never does this, instead it returns the error to user application. The builtin pb_ostream_from_buffer is safe to call again after failed write.
+
+pb_encode
+---------
+Encodes the contents of a structure as a protocol buffers message and writes it to output stream. ::
+
+    bool pb_encode(pb_ostream_t *stream, const pb_field_t fields[], const void *src_struct);
+
+:stream:        Output stream to write to.
+:fields:        A field description array, usually autogenerated.
+:src_struct:    Pointer to the data that will be serialized.
+:returns:       True on success, false on IO error, on detectable errors in field description, or if a field encoder returns false.
+
+Normally pb_encode simply walks through the fields description array and serializes each field in turn. However, submessages must be serialized twice: first to calculate their size and then to actually write them to output. This causes some constraints for callback fields, which must return the same data on every call.
+
+pb_encode_varint
+----------------
+Encodes an unsigned integer in the varint_ format. ::
+
+    bool pb_encode_varint(pb_ostream_t *stream, uint64_t value);
+
+:stream:        Output stream to write to. 1-10 bytes will be written.
+:value:         Value to encode.
+:returns:       True on success, false on IO error.
+
+.. _varint: http://code.google.com/apis/protocolbuffers/docs/encoding.html#varints
+
+pb_encode_tag
+-------------
+Starts a field in the Protocol Buffers binary format: encodes the field number and the wire type of the data. ::
+
+    bool pb_encode_tag(pb_ostream_t *stream, pb_wire_type_t wiretype, int field_number);
+
+:stream:        Output stream to write to. 1-5 bytes will be written.
+:wiretype:      PB_WT_VARINT, PB_WT_64BIT, PB_WT_STRING or PB_WT_32BIT
+:field_number:  Identifier for the field, defined in the .proto file.
+:returns:       True on success, false on IO error.
+
+pb_encode_tag_for_field
+-----------------------
+Same as `pb_encode_tag`_, except takes the parameters from a *pb_field_t* structure. ::
+
+    bool pb_encode_tag_for_field(pb_ostream_t *stream, const pb_field_t *field);
+
+:stream:        Output stream to write to. 1-5 bytes will be written.
+:field:         Field description structure. Usually autogenerated.
+:returns:       True on success, false on IO error or unknown field type.
+
+This function only considers the LTYPE of the field. You can use it from your field callbacks, because the source generator writes correct LTYPE also for callback type fields.
+
+pb_encode_string
+----------------
+Writes the length of a string as varint and then contents of the string. Used for writing fields with wire type PB_WT_STRING. ::
+
+    bool pb_encode_string(pb_ostream_t *stream, const uint8_t *buffer, size_t size);
+
+:stream:        Output stream to write to.
+:buffer:        Pointer to string data.
+:size:          Number of bytes in the string.
+:returns:       True on success, false on IO error.
+
+.. sidebar:: Field encoders
+
+    The functions with names beginning with *pb_enc_* are called field encoders. Each PB_LTYPE has an own field encoder, which handles translating from C data into Protocol Buffers data.
+
+    By using the *data_size* in the field description and by taking advantage of C casting rules, it has been possible to combine many data types to a single LTYPE. For example, *int32*, *uint32*, *int64*, *uint64*, *bool* and *enum* are all handled by *pb_enc_varint*.
+
+    Each field encoder only encodes the contents of the field. The tag must be encoded separately with `pb_encode_tag_for_field`_.
+
+    You can use the field encoders from your callbacks.
+
+pb_enc_varint
+-------------
+Field encoder for PB_LTYPE_VARINT. Takes the first *field->data_size* bytes from src, casts them as *uint64_t* and calls `pb_encode_varint`_. ::
+
+    bool pb_enc_varint(pb_ostream_t *stream, const pb_field_t *field, const void *src);
+
+:stream:        Output stream to write to.
+:field:         Field description structure. Only *data_size* matters.
+:src:           Pointer to start of the field data.
+:returns:       True on success, false on IO error.
+
+pb_enc_svarint
+--------------
+Field encoder for PB_LTYPE_SVARINT. Similar to `pb_enc_varint`_, except first zig-zag encodes the value for more efficient negative number encoding. ::
+
+    bool pb_enc_svarint(pb_ostream_t *stream, const pb_field_t *field, const void *src);
+
+(parameters are the same as for `pb_enc_varint`_)
+
+The number is considered negative if the high-order bit of the value is set. On big endian computers, it is the highest bit of *\*src*. On little endian computers, it is the highest bit of *\*(src + field->data_size - 1)*.
+
+pb_enc_fixed
+------------
+Field encoder for PB_LTYPE_FIXED. Writes the data in little endian order. On big endian computers, reverses the order of bytes. ::
+
+    bool pb_enc_fixed(pb_ostream_t *stream, const pb_field_t *field, const void *src);
+
+(parameters are the same as for `pb_enc_varint`_)
+
+The same function is used for both integers, floats and doubles. This break encoding of double values on architectures where they are mixed endian (primarily some arm processors with hardware FPU).
+
+pb_enc_bytes
+------------
+Field encoder for PB_LTYPE_BYTES. Just calls `pb_encode_string`_. ::
+
+    bool pb_enc_bytes(pb_ostream_t *stream, const pb_field_t *field, const void *src);
+
+:stream:        Output stream to write to.
+:field:         Not used.
+:src:           Pointer to a structure similar to pb_bytes_array_t.
+:returns:       True on success, false on IO error.
+
+This function expects a pointer to a structure with a *size_t* field at start, and a variable sized byte array after it. The platform-specific field offset is inferred from *pb_bytes_array_t*, which has a byte array of size 1.
+
+pb_enc_string
+-------------
+Field encoder for PB_LTYPE_STRING. Determines size of string with strlen() and then calls `pb_encode_string`_. ::
+
+    bool pb_enc_string(pb_ostream_t *stream, const pb_field_t *field, const void *src);
+
+:stream:        Output stream to write to.
+:field:         Not used.
+:src:           Pointer to a null-terminated string.
+:returns:       True on success, false on IO error.
+
+pb_enc_submessage
+-----------------
+Field encoder for PB_LTYPE_SUBMESSAGE. Calls `pb_encode`_ to perform the actual encoding. ::
+
+    bool pb_enc_submessage(pb_ostream_t *stream, const pb_field_t *field, const void *src);
+
+:stream:        Output stream to write to.
+:field:         Field description structure. The *ptr* field must be a pointer to a field description array for the submessage.
+:src:           Pointer to the structure where submessage data is.
+:returns:       True on success, false on IO errors, pb_encode errors or if submessage size changes between calls.
+
+In Protocol Buffers format, the submessage size must be written before the submessage contents. Therefore, this function has to encode the submessage twice in order to know the size beforehand.
+
+If the submessage contains callback fields, the callback function might misbehave and write out a different amount of data on the second call. This situation is recognized and *false* is returned, but it is up to the caller to ensure that the receiver of the message does not interpret it as valid data.
+
+pb_decode.h
+===========
+
+pb_istream_from_buffer
+----------------------
+Helper function for creating an input stream that reads data from a memory buffer. ::
+
+    pb_istream_t pb_istream_from_buffer(uint8_t *buf, size_t bufsize);
+
+:buf:           Pointer to byte array to read from.
+:bufsize:       Size of the byte array.
+:returns:       An input stream ready to use.
+
+pb_read
+-------
+Read data from input stream. Always use this function, don't try to call the stream callback directly. ::
+
+    bool pb_read(pb_istream_t *stream, uint8_t *buf, size_t count);
+
+:stream:        Input stream to read from.
+:buf:           Buffer to store the data to, or NULL to just read data without storing it anywhere.
+:count:         Number of bytes to read.
+:returns:       True on success, false if *stream->bytes_left* is less than *count* or if an IO error occurs.
+
+End of file is signalled by *stream->bytes_left* being zero after pb_read returns false.
+
+pb_decode_varint
+----------------
+Read and decode a varint_ encoded integer. ::
+
+    bool pb_decode_varint(pb_istream_t *stream, uint64_t *dest);
+
+:stream:        Input stream to read from. 1-10 bytes will be read.
+:dest:          Storage for the decoded integer. Value is undefined on error.
+:returns:       True on success, false if value exceeds uint64_t range or an IO error happens.
+
+pb_skip_varint
+--------------
+Skip a varint_ encoded integer without decoding it. ::
+
+    bool pb_skip_varint(pb_istream_t *stream);
+
+:stream:        Input stream to read from. Will read 1 byte at a time until the MSB is clear.
+:returns:       True on success, false on IO error.
+
+pb_skip_string
+--------------
+Skip a varint-length-prefixed string. This means skipping a value with wire type PB_WT_STRING. ::
+
+    bool pb_skip_string(pb_istream_t *stream);
+
+:stream:        Input stream to read from.
+:returns:       True on success, false on IO error or length exceeding uint32_t.
+
+pb_decode
+---------
+Read and decode all fields of a structure. Reads until EOF on input stream. ::
+
+    bool pb_decode(pb_istream_t *stream, const pb_field_t fields[], void *dest_struct);
+
+:stream:        Input stream to read from.
+:fields:        A field description array. Usually autogenerated.
+:dest_struct:   Pointer to structure where data will be stored.
+:returns:       True on success, false on IO error, on detectable errors in field description, if a field encoder returns false or if a required field is missing.
+
+In Protocol Buffers binary format, EOF is only allowed between fields. If it happens anywhere else, pb_decode will return *false*.
+
+In addition to EOF, the pb_decode implementation supports terminating a message with a 0 byte. This is compatible with the official Protocol Buffers because 0 is never a valid field tag.
+
+For optional fields, this function applies the default value and sets *has_<field>* to false if the field is not present.
+
+Because of memory concerns, the detection of missing required fields is not perfect if the structure contains more than 32 fields.
+
+.. sidebar:: Field decoders
+    
+    The functions with names beginning with *pb_dec_* are called field decoders. Each PB_LTYPE has an own field decoder, which handles translating from Protocol Buffers data to C data.
+
+    Each field decoder reads and decodes a single value. For arrays, the decoder is called repeatedly.
+
+    You can use the decoders from your callbacks.
+
+pb_dec_varint
+-------------
+Field decoder for PB_LTYPE_VARINT. ::
+
+    bool pb_dec_varint(pb_istream_t *stream, const pb_field_t *field, void *dest)
+
+:stream:        Input stream to read from. 1-10 bytes will be read.
+:field:         Field description structure. Only *field->data_size* matters.
+:dest:          Pointer to destination integer. Must have size of *field->data_size* bytes.
+:returns:       True on success, false on IO errors or if `pb_decode_varint`_ fails.
+
+This function first calls `pb_decode_varint`_. It then copies the first bytes of the 64-bit result value to *dest*, or on big endian architectures, the last bytes.
+
+pb_dec_svarint
+--------------
+Field decoder for PB_LTYPE_SVARINT. Similar to `pb_dec_varint`_, except that it performs zigzag-decoding on the value. ::
+
+    bool pb_dec_svarint(pb_istream_t *stream, const pb_field_t *field, void *dest);
+
+(parameters are the same as `pb_dec_varint`_)
+
+pb_dec_fixed
+------------
+Field decoder for PB_LTYPE_FIXED. ::
+
+    bool pb_dec_fixed(pb_istream_t *stream, const pb_field_t *field, void *dest);
+
+(parameters are the same as `pb_dec_varint`_)
+
+This function reads *field->data_size* bytes from the input stream.
+On big endian architectures, it then reverses the order of the bytes.
+Finally, it writes the bytes to *dest*.
+
+pb_dec_bytes
+------------
+Field decoder for PB_LTYPE_BYTES. Reads a length-prefixed block of bytes. ::
+
+    bool pb_dec_bytes(pb_istream_t *stream, const pb_field_t *field, void *dest);
+
+:stream:        Input stream to read from.
+:field:         Field description structure. Only *field->data_size* matters.
+:dest:          Pointer to a structure similar to pb_bytes_array_t.
+:returns:       True on success, false on IO error or if length exceeds the array size.
+
+This function expects a pointer to a structure with a *size_t* field at start, and a variable sized byte array after it. It will deduce the maximum size of the array from *field->data_size*.
+
+pb_dec_string
+-------------
+Field decoder for PB_LTYPE_STRING. Reads a length-prefixed string. ::
+
+    bool pb_dec_string(pb_istream_t *stream, const pb_field_t *field, void *dest);
+
+:stream:        Input stream to read from.
+:field:         Field description structure. Only *field->data_size* matters.
+:dest:          Pointer to a character array of size *field->data_size*.
+:returns:       True on success, false on IO error or if length exceeds the array size.
+
+This function null-terminates the string when successful. On error, the contents of the destination array is undefined.
+
+pb_dec_submessage
+-----------------
+Field decoder for PB_LTYPE_SUBMESSAGE. Calls `pb_decode`_ to perform the actual decoding. ::
+
+    bool pb_dec_submessage(pb_istream_t *stream, const pb_field_t *field, void *dest)
+
+:stream:        Input stream to read from.
+:field:         Field description structure. Only *field->ptr* matters.
+:dest:          Pointer to the destination structure.
+:returns:       True on success, false on IO error or if `pb_decode`_ fails.
+
+The *field->ptr* should be a pointer to *pb_field_t* array describing the submessage.
+
diff --git a/generator/nanopb_generator.py b/generator/nanopb_generator.py
index 6aff3155..e62d04f1 100644
--- a/generator/nanopb_generator.py
+++ b/generator/nanopb_generator.py
@@ -204,7 +204,7 @@ class Field:
         if prev_field_name is None:
             result += '    offsetof(%s, %s),' % (self.struct_name, self.name)
         else:
-            result += '    pb_delta(%s, %s, %s),' % (self.struct_name, self.name, prev_field_name)
+            result += '    pb_delta_end(%s, %s, %s),' % (self.struct_name, self.name, prev_field_name)
         
         if self.htype == 'PB_HTYPE_OPTIONAL':
             result += '\n    pb_delta(%s, has_%s, %s),' % (self.struct_name, self.name, self.name)
@@ -279,11 +279,11 @@ class Message:
         return result
 
     def fields_declaration(self):
-        result = 'extern const pb_field_t %s_fields[%d];' % (self.name, len(self.fields))
+        result = 'extern const pb_field_t %s_fields[%d];' % (self.name, len(self.fields) + 1)
         return result
 
     def fields_definition(self):
-        result = 'const pb_field_t %s_fields[%d] = {\n' % (self.name, len(self.fields))
+        result = 'const pb_field_t %s_fields[%d] = {\n' % (self.name, len(self.fields) + 1)
         
         prev = None
         for field in self.ordered_fields:
@@ -291,7 +291,7 @@ class Message:
             result += ',\n\n'
             prev = field.name
         
-        result = result[:-3] + '\n};'
+        result += '    PB_LAST_FIELD\n};'
         return result
 
 def iterate_messages(desc, names = Names()):
diff --git a/pb.h b/pb.h
index 1d32ee9b..e6037b81 100644
--- a/pb.h
+++ b/pb.h
@@ -1,6 +1,11 @@
 #ifndef _PB_H_
 #define _PB_H_
 
+/* pb.h: Common parts for nanopb library.
+ * Most of these are quite low-level stuff. For the high-level interface,
+ * see pb_encode.h or pb_decode.h
+ */
+
 #include <stdint.h>
 #include <stddef.h>
 #include <stdbool.h>
@@ -12,15 +17,7 @@
 #define pb_packed
 #endif
 
-/* Wire types. Library user needs these only in encoder callbacks. */
-typedef enum {
-    PB_WT_VARINT = 0,
-    PB_WT_64BIT  = 1,
-    PB_WT_STRING = 2,
-    PB_WT_32BIT  = 5
-} pb_wire_type_t;
-
-/* List of possible field types
+/* List of possible field types. These are used in the autogenerated code.
  * Least-significant 4 bits tell the scalar type
  * Most-significant 4 bits specify repeated/required/packed etc.
  * 
@@ -143,10 +140,19 @@ struct _pb_callback_t {
     void *arg;
 };
 
+/* Wire types. Library user needs these only in encoder callbacks. */
+typedef enum {
+    PB_WT_VARINT = 0,
+    PB_WT_64BIT  = 1,
+    PB_WT_STRING = 2,
+    PB_WT_32BIT  = 5
+} pb_wire_type_t;
+
 /* These macros are used to declare pb_field_t's in the constant array. */
 #define pb_membersize(st, m) (sizeof ((st*)0)->m)
 #define pb_arraysize(st, m) (pb_membersize(st, m) / pb_membersize(st, m[0]))
 #define pb_delta(st, m1, m2) ((int)offsetof(st, m1) - (int)offsetof(st, m2))
+#define pb_delta_end(st, m1, m2) (offsetof(st, m1) - offsetof(st, m2) - pb_membersize(st, m2))
 #define PB_LAST_FIELD {0,0,0,0}
 
 
diff --git a/pb_decode.c b/pb_decode.c
index e2888f22..379d134c 100644
--- a/pb_decode.c
+++ b/pb_decode.c
@@ -62,15 +62,15 @@ pb_istream_t pb_istream_from_buffer(uint8_t *buf, size_t bufsize)
  * Helper functions *
  ********************/
 
-bool pb_decode_varint32(pb_istream_t *stream, uint32_t *dest)
+static bool pb_decode_varint32(pb_istream_t *stream, uint32_t *dest)
 {
     uint64_t temp;
-    bool status = pb_decode_varint64(stream, &temp);
+    bool status = pb_decode_varint(stream, &temp);
     *dest = temp;
     return status;
 }
 
-bool pb_decode_varint64(pb_istream_t *stream, uint64_t *dest)
+bool pb_decode_varint(pb_istream_t *stream, uint64_t *dest)
 {
     uint8_t byte;
     int bitpos = 0;
@@ -108,7 +108,7 @@ bool pb_skip_string(pb_istream_t *stream)
     return pb_read(stream, NULL, length);
 }
 
-/* Currently all wire type related stuff is kept hidden from
+/* Currently the wire type related stuff is kept hidden from
  * callbacks. They shouldn't need it. It's better for performance
  * to just assume the correct type and fail safely on corrupt message.
  */
@@ -192,6 +192,7 @@ static void pb_field_init(pb_field_iterator_t *iter, const pb_field_t *fields, v
 static bool pb_field_next(pb_field_iterator_t *iter)
 {
     bool notwrapped = true;
+    size_t prev_size = iter->current->data_size * iter->current->array_size;
     iter->current++;
     iter->field_index++;
     if (iter->current->tag == 0)
@@ -199,10 +200,11 @@ static bool pb_field_next(pb_field_iterator_t *iter)
         iter->current = iter->start;
         iter->field_index = 0;
         iter->pData = iter->dest_struct;
+        prev_size = 0;
         notwrapped = false;
     }
     
-    iter->pData = (char*)iter->pData + iter->current->data_offset;
+    iter->pData = (char*)iter->pData + prev_size + iter->current->data_offset;
     iter->pSize = (char*)iter->pData + iter->current->size_offset;
     return notwrapped;
 }
@@ -224,7 +226,7 @@ static bool pb_field_find(pb_field_iterator_t *iter, int tag)
  * Decode a single field *
  *************************/
 
-bool decode_field(pb_istream_t *stream, int wire_type, pb_field_iterator_t *iter)
+static bool decode_field(pb_istream_t *stream, int wire_type, pb_field_iterator_t *iter)
 {
     pb_decoder_t func = PB_DECODERS[PB_LTYPE(iter->current->type)];
     
@@ -351,6 +353,9 @@ bool pb_decode(pb_istream_t *stream, const pb_field_t fields[], void *dest_struc
         if (!pb_decode_varint32(stream, &temp))
             return stream->bytes_left == 0; /* Was it EOF? */
         
+        if (temp == 0)
+            return true; /* Special feature: allow 0-terminated messages. */
+        
         tag = temp >> 3;
         wire_type = temp & 7;
         
@@ -399,7 +404,7 @@ static void endian_copy(void *dest, void *src, size_t destsize, size_t srcsize)
 bool pb_dec_varint(pb_istream_t *stream, const pb_field_t *field, void *dest)
 {
     uint64_t temp;
-    bool status = pb_decode_varint64(stream, &temp);
+    bool status = pb_decode_varint(stream, &temp);
     endian_copy(dest, &temp, field->data_size, sizeof(temp));
     return status;
 }
@@ -407,7 +412,7 @@ bool pb_dec_varint(pb_istream_t *stream, const pb_field_t *field, void *dest)
 bool pb_dec_svarint(pb_istream_t *stream, const pb_field_t *field, void *dest)
 {
     uint64_t temp;
-    bool status = pb_decode_varint64(stream, &temp);
+    bool status = pb_decode_varint(stream, &temp);
     temp = (temp >> 1) ^ -(int64_t)(temp & 1);
     endian_copy(dest, &temp, field->data_size, sizeof(temp));
     return status;
diff --git a/pb_decode.h b/pb_decode.h
index 011efdd9..2d4e5863 100644
--- a/pb_decode.h
+++ b/pb_decode.h
@@ -48,8 +48,7 @@ bool pb_decode(pb_istream_t *stream, const pb_field_t fields[], void *dest_struc
  * You may want to use these from your caller or callbacks.
  */
 
-bool pb_decode_varint32(pb_istream_t *stream, uint32_t *dest);
-bool pb_decode_varint64(pb_istream_t *stream, uint64_t *dest);
+bool pb_decode_varint(pb_istream_t *stream, uint64_t *dest);
 
 bool pb_skip_varint(pb_istream_t *stream);
 bool pb_skip_string(pb_istream_t *stream);
diff --git a/pb_encode.c b/pb_encode.c
index 19a531cf..188d768d 100644
--- a/pb_encode.c
+++ b/pb_encode.c
@@ -124,11 +124,13 @@ bool pb_encode(pb_ostream_t *stream, const pb_field_t fields[], const void *src_
     const pb_field_t *field = fields;
     const void *pData = src_struct;
     const void *pSize;
+    size_t prev_size = 0;
     
     while (field->tag != 0)
     {
-        pData = (const char*)pData + field->data_offset;
+        pData = (const char*)pData + prev_size + field->data_offset;
         pSize = (const char*)pData + field->size_offset;
+        prev_size = field->data_size * field->array_size;
         
         pb_encoder_t func = PB_ENCODERS[PB_LTYPE(field->type)];
         
diff --git a/tests/Makefile b/tests/Makefile
index 84c035e5..caed5bad 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -10,6 +10,10 @@ clean:
 %: %.c $(DEPS)
 	$(CC) $(CFLAGS) -o $@ $< ../pb_decode.c ../pb_encode.c
 
+person.h: person.proto
+	protoc -I. -I../generator -I/usr/include -operson.pb $<
+	python ../generator/nanopb_generator.py person.pb
+
 run_unittests: decode_unittests encode_unittests
 	./decode_unittests
 	./encode_unittests
diff --git a/tests/decode_unittests.c b/tests/decode_unittests.c
index ff4c6b46..ac51d491 100644
--- a/tests/decode_unittests.c
+++ b/tests/decode_unittests.c
@@ -50,32 +50,19 @@ int main()
     
     {
         pb_istream_t s;
-        uint32_t u;
-        int32_t i;
-        
-        COMMENT("Test pb_decode_varint32");
-        TEST((s = S("\x00"), pb_decode_varint32(&s, &u) && u == 0));
-        TEST((s = S("\x01"), pb_decode_varint32(&s, &u) && u == 1));
-        TEST((s = S("\xAC\x02"), pb_decode_varint32(&s, &u) && u == 300));
-        TEST((s = S("\xFF\xFF\xFF\xFF\x0F"), pb_decode_varint32(&s, &u) && u == UINT32_MAX));
-        TEST((s = S("\xFF\xFF\xFF\xFF\x0F"), pb_decode_varint32(&s, (uint32_t*)&i) && i == -1));
-    }
-    
-    {
-        pb_istream_t s;
         uint64_t u;
         int64_t i;
         
-        COMMENT("Test pb_decode_varint64");
-        TEST((s = S("\x00"), pb_decode_varint64(&s, &u) && u == 0));
-        TEST((s = S("\x01"), pb_decode_varint64(&s, &u) && u == 1));
-        TEST((s = S("\xAC\x02"), pb_decode_varint64(&s, &u) && u == 300));
-        TEST((s = S("\xFF\xFF\xFF\xFF\x0F"), pb_decode_varint64(&s, &u) && u == UINT32_MAX));
-        TEST((s = S("\xFF\xFF\xFF\xFF\x0F"), pb_decode_varint64(&s, (uint64_t*)&i) && i == UINT32_MAX));
+        COMMENT("Test pb_decode_varint");
+        TEST((s = S("\x00"), pb_decode_varint(&s, &u) && u == 0));
+        TEST((s = S("\x01"), pb_decode_varint(&s, &u) && u == 1));
+        TEST((s = S("\xAC\x02"), pb_decode_varint(&s, &u) && u == 300));
+        TEST((s = S("\xFF\xFF\xFF\xFF\x0F"), pb_decode_varint(&s, &u) && u == UINT32_MAX));
+        TEST((s = S("\xFF\xFF\xFF\xFF\x0F"), pb_decode_varint(&s, (uint64_t*)&i) && i == UINT32_MAX));
         TEST((s = S("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x01"),
-              pb_decode_varint64(&s, (uint64_t*)&i) && i == -1));
+              pb_decode_varint(&s, (uint64_t*)&i) && i == -1));
         TEST((s = S("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x01"),
-              pb_decode_varint64(&s, &u) && u == UINT64_MAX));
+              pb_decode_varint(&s, &u) && u == UINT64_MAX));
     }
     
     {