Skip to content

Commit

Permalink
Merge pull request #54 from dryman/OPIC-2
Browse files Browse the repository at this point in the history
OPIC-2 updates README
  • Loading branch information
dryman authored Apr 30, 2017
2 parents 0f6689c + 2e6d468 commit 7e388fd
Show file tree
Hide file tree
Showing 19 changed files with 774 additions and 467 deletions.
3 changes: 0 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +0,0 @@
[submodule "doxygen-bootstrapped"]
path = doxygen-bootstrapped
url = https://github.com/dryman/doxygen-bootstrapped.git
40 changes: 20 additions & 20 deletions DoxyFile
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ CREATE_SUBDIRS = NO
# U+3044.
# The default value is: NO.

ALLOW_UNICODE_NAMES = NO
ALLOW_UNICODE_NAMES = YES

# The OUTPUT_LANGUAGE tag is used to specify the language in which all
# documentation generated by doxygen is written. Doxygen will use this
Expand Down Expand Up @@ -118,7 +118,7 @@ REPEAT_BRIEF = YES
# the entity):The $name class, The $name widget, The $name file, is, provides,
# specifies, contains, represents, a, an and the.

ABBREVIATE_BRIEF =
ABBREVIATE_BRIEF = YES

# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
# doxygen will generate a detailed section even if there is only a brief
Expand Down Expand Up @@ -177,7 +177,7 @@ SHORT_NAMES = NO
# description.)
# The default value is: NO.

JAVADOC_AUTOBRIEF = YES
JAVADOC_AUTOBRIEF = NO

# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
# line (until the first dot) of a Qt-style comment as the brief description. If
Expand All @@ -203,7 +203,7 @@ MULTILINE_CPP_IS_BRIEF = NO
# documentation from any documented member that it re-implements.
# The default value is: YES.

INHERIT_DOCS = YES
INHERIT_DOCS = NO

# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new
# page for each member. If set to NO, the documentation of a member will be part
Expand Down Expand Up @@ -242,7 +242,7 @@ TCL_SUBST =
# members will be omitted, etc.
# The default value is: NO.

OPTIMIZE_OUTPUT_FOR_C = NO
OPTIMIZE_OUTPUT_FOR_C = YES

# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
# Python sources only. Doxygen will then generate output that is more tailored
Expand Down Expand Up @@ -350,7 +350,7 @@ DISTRIBUTE_GROUP_DOC = NO
# \nosubgrouping command.
# The default value is: YES.

SUBGROUPING = YES
SUBGROUPING = NO

# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions
# are shown inside the group in which they are included (e.g. using \ingroup)
Expand Down Expand Up @@ -427,15 +427,15 @@ EXTRACT_PACKAGE = NO
# included in the documentation.
# The default value is: NO.

EXTRACT_STATIC = NO
EXTRACT_STATIC = YES

# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
# locally in source files will be included in the documentation. If set to NO,
# only classes defined in header files are included. Does not have any effect
# for Java sources.
# The default value is: YES.

EXTRACT_LOCAL_CLASSES = YES
EXTRACT_LOCAL_CLASSES = NO

# This flag is only useful for Objective-C code. If set to YES, local methods,
# which are defined in the implementation section but not in the interface are
Expand Down Expand Up @@ -482,7 +482,7 @@ HIDE_FRIEND_COMPOUNDS = NO
# blocks will be appended to the function's detailed documentation block.
# The default value is: NO.

HIDE_IN_BODY_DOCS = NO
HIDE_IN_BODY_DOCS = YES

# The INTERNAL_DOCS tag determines if documentation that is typed after a
# \internal command is included. If the tag is set to NO then the documentation
Expand Down Expand Up @@ -518,14 +518,14 @@ HIDE_COMPOUND_REFERENCE= NO
# the files that are included by a file in the documentation of that file.
# The default value is: YES.

SHOW_INCLUDE_FILES = YES
SHOW_INCLUDE_FILES = NO

# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
# grouped member an include statement to the documentation, telling the reader
# which file to include in order to use the member.
# The default value is: NO.

SHOW_GROUPED_MEMB_INC = NO
SHOW_GROUPED_MEMB_INC = YES

# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
# files with double quotes in the documentation rather than with sharp brackets.
Expand All @@ -544,7 +544,7 @@ INLINE_INFO = YES
# name. If set to NO, the members will appear in declaration order.
# The default value is: YES.

SORT_MEMBER_DOCS = YES
SORT_MEMBER_DOCS = NO

# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
# descriptions of file, namespace and class members alphabetically by member
Expand Down Expand Up @@ -597,7 +597,7 @@ STRICT_PROTO_MATCHING = NO
# list. This list is created by putting \todo commands in the documentation.
# The default value is: YES.

GENERATE_TODOLIST = YES
GENERATE_TODOLIST = NO

# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
# list. This list is created by putting \test commands in the documentation.
Expand Down Expand Up @@ -758,7 +758,7 @@ WARN_LOGFILE =
# spaces.
# Note: If this tag is empty the current directory is searched.

INPUT = doc src
INPUT = . opic opic/common opic/hash

# This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
Expand Down Expand Up @@ -894,7 +894,7 @@ FILTER_SOURCE_PATTERNS =
# (index.html). This can be useful if you have a project on for instance GitHub
# and want to reuse the introduction page also for the doxygen output.

USE_MDFILE_AS_MAINPAGE =
USE_MDFILE_AS_MAINPAGE = README.md

#---------------------------------------------------------------------------
# Configuration options related to source browsing
Expand All @@ -907,7 +907,7 @@ USE_MDFILE_AS_MAINPAGE =
# also VERBATIM_HEADERS is set to NO.
# The default value is: NO.

SOURCE_BROWSER = NO
SOURCE_BROWSER = YES

# Setting the INLINE_SOURCES tag to YES will include the body of functions,
# classes and enums directly into the documentation.
Expand Down Expand Up @@ -1050,7 +1050,7 @@ HTML_FILE_EXTENSION = .html
# of the possible markers and block names see the documentation.
# This tag requires that the tag GENERATE_HTML is set to YES.

HTML_HEADER = doxygen-bootstrapped/example-site/header.html
HTML_HEADER =

# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
# generated HTML page. If the tag is left blank doxygen will generate a standard
Expand All @@ -1060,7 +1060,7 @@ HTML_HEADER = doxygen-bootstrapped/example-site/header.html
# that doxygen normally uses.
# This tag requires that the tag GENERATE_HTML is set to YES.

HTML_FOOTER = doxygen-bootstrapped/example-site/footer.html
HTML_FOOTER =

# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
# sheet that is used by each HTML page. It can be used to fine-tune the look of
Expand All @@ -1085,7 +1085,7 @@ HTML_STYLESHEET =
# list). For an example see the documentation.
# This tag requires that the tag GENERATE_HTML is set to YES.

HTML_EXTRA_STYLESHEET = doxygen-bootstrapped/customdoxygen.css
HTML_EXTRA_STYLESHEET =

# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
# other source files which should be copied to the HTML output directory. Note
Expand All @@ -1095,7 +1095,7 @@ HTML_EXTRA_STYLESHEET = doxygen-bootstrapped/customdoxygen.css
# files will be copied as-is; there are no commands or markers available.
# This tag requires that the tag GENERATE_HTML is set to YES.

HTML_EXTRA_FILES = doxygen-bootstrapped/doxy-boot.js
HTML_EXTRA_FILES =

# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
# will adjust the colors in the style sheet and background images according to
Expand Down
7 changes: 6 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,9 @@ SUBDIRS = opic benchmark

nobase_include_HEADERS = \
opic/op_malloc.h \
opic/demomalloc.h
opic/common/op_assert.h \
opic/common/op_atomic.h \
opic/common/op_macros.h \
opic/common/op_log.h \
opic/hash/op_hash.h \
opic/hash/robin_hood.h
133 changes: 104 additions & 29 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,47 +1,123 @@
Object Persistence In C (Alpha)
Object Persistence In C (Beta)
===================================

[![Build Status](https://travis-ci.org/dryman/opic.svg?branch=master)](https://travis-ci.org/dryman/opic)

OPIC is a new approach to serialize general data structures, object types, and
primitive types. It's a new data stack from the ground up -- from object
oriented model, generics, to memory management. With everything redesigned for
scalability, we visioned a new distributed computing ecosystem for the 21st
century.
OPIC is a revolutionary serialization framework for C. Unlike traditional
approaches which walk through the in-memory objects and write it to disk, OPIC
itself is a memory allocator where all the objects created with it have the same
representation in memory and on disk. "Serializing/deserializing" is extreme
cheap with OPIC because it only requires memory dump and mmap syscalls.

OPIC is suitable for building database indexes, key-value store, or even search
engines. At the moment of writing we provide a POC hash table to demonstrate how
easy it is to build an embedded key-value store engine.

TODO: link to post or tutorial for the hash table.

SYNOPSIS
--------

```c
#include "opic/op_malloc.h"

void simple_object_database(char* filename)
{
OPHeap* heap1, heap2;
FILE* fd;

OPHeapNew(&heap1);

struct S1* s1 = (struct S1*)OPMallocRaw(heap1, sizeof(struct S1));
struct S2* s2 = (struct S2*)OPMallocRaw(heap1, sizeof(struct S2));

// object relationships in OPIC must convert to opref_t
s1->s2_ref = OPPtr2Ref(s2);

// opref_t can convert back to pointer via OPRef2Ptr
struct S2* s2_ptr = OPRef2Ptr(heap1, s1->s2ref);

// Serialize the heap to a file
OPHeapStorePtr(heap1, s1, 0);
fd = fopen(filename, "w");
OPHeapWrite(heap1, fd)
fclose(fd);
OPHeapDestroy(heap1);

// Deserialize the heap and restore the objects
fd = fopen(filename, "r");
OPHeapRead(&heap2, fd);
fclose(fd);

s1 = (struct S1*)OPHeapRestorePtr(heap2, 0);
s2 = s2_ptr = OPRef2Ptr(heap2, s1->s2_ref);
OPHeapDestroy(heap2);
}
```
DEPENDENCY
----------
* C compiler with support of C11 atomics.
- gcc 4.9, gcc 5, gcc 6
- TODO: figure out which versions of clang support C11 atomics.
* [log4c (>= 1.2.4)](http://log4c.sourceforge.net)
- I guess 1.2.1 also works, but 1.2.4 was released since 2008. Getting
it on most distros shouldn't be hard.
* [cmocka (>= 1.0.1)](https://cmocka.org)
- Required for unit testing.
* GNU Autotools for people who want to build from head
- autoconf
- automake
- libtools
INSTALL
-------
```bash
# For people who download the tarball release
./configure; make; make install
# For people who clone from github
./bootstrap.sh
./configure; make; make install
```

Our key to success is to extend the memory oriented programming model to other
tier of storages: disk, SSD, network, or even tape. Accessing data should not be
limited by memory address on single machine; instead, all data types should be
serializable so that it can be shared across network or save for future use.
Having serialzability as the core design brings many benefits for free:
User who runs OPIC on linux need to disable overcommit accounting. This is
because OPIC pre-allocates large memory in 64bit memory space.

* Scalability. When all the data types are serializable, the application can
easily scale up with multiple strategies, including sharing data between nodes,
off load certain data for later use, or build caches from the same serialization
abstraction.
```bash
sudo sysctl vm.overcommit_memory=1
```

* Data persistence. Think of databases' durability. Since all the data types are
serializable, you can save the snapshot of the entire program state whenever you
want. This can make database applications way easier to build, and also make
debugging easier.
DATA STRUCTURES INCLUDED
------------------------

* Generality for data processing applications. All the data processing systems,
including RDBMS, map reduce, search engines, even RPC involves are some sort of
special case of serialization. Why duplicates the effort for each of those
applications instead of having one powerful and optimized framework?
* RobinHoodHashing, can be used as
- HashMap
- HashSet
- HashMultimap
- TODO: document benchmark results

Most of the functionalities will be implemented in C. Our end goal is to embed
this framework into other higher level languages like python, java, nodejs for
wider adoption.
* TODO (like a wishlist):
- Integer DS which support predecessor quries
- Tries
- Succinct data structures
- Integer compression (for columnar store)

To begin with, please check out our [documentation page][doc].
DOCUMENTATION
-------------

* [API documentation][doc]
* TODO: tutorial
* TODO: advanced examples which explains how it work

[doc]: http://dryman.github.com/opic/

LICENSE
-------

Copyright (c) 2016 Felix Chern
Copyright (c) 2016, 2017 Felix Chern

OPIC is free software: you can redistribute it and/or modify it under the
terms of the GNU Lesser General Public License as published by the Free
Expand All @@ -66,4 +142,3 @@ writing we still have some portion of the code hasn't yet convert to GNU
formatting styles, but this should be fixed in near future.

[gnuc]: https://www.gnu.org/prep/standards/standards.html

2 changes: 1 addition & 1 deletion benchmark/malloc_bench.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ run_alloc_benchmark(int loops, size_t blk_min, size_t blk_max,
}

/* Insert the newly alloced block into the array at a random point. */
blk_array[next_idx] = OPMallocRaw(heap, blk_size);
blk_array[next_idx] = OPMalloc(heap, blk_size);
#ifdef DEBUG
printf("got addr %p\n", blk_array[next_idx]);
#endif
Expand Down
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
AC_INIT([OPIC],[0.3])
AC_INIT([OPIC],[0.4.0])

AC_CONFIG_SRCDIR([README.md])
AC_CONFIG_AUX_DIR([build-aux])
Expand Down
1 change: 0 additions & 1 deletion doxygen-bootstrapped
Submodule doxygen-bootstrapped deleted from e84e5d
1 change: 0 additions & 1 deletion opic/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ lib_LTLIBRARIES = libopic.la


libopic_la_SOURCES = \
demomalloc.c \
common/op_log.c \
malloc/op_malloc.c \
malloc/allocator.c \
Expand Down
Loading

0 comments on commit 7e388fd

Please sign in to comment.