/prog/ - [C] explode() — Tinychan BBS

Name: Anonymous 2015-07-29 1:02

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>

struct v {
    void *p;
    size_t n;
};

struct v **explode(const void *p, const void *q, size_t pn, size_t qn, size_t size);
void *memmem(const void *p, const void *q, size_t pn, size_t qn, size_t size);
void *memdup(const void *p, size_t n);

int main(void) {
    struct v **p = explode("a b", " ", 3, 1, 1);
    struct v **q = p;
    if(p == NULL) {
        perror(NULL);
        return EXIT_FAILURE;
    }
    while(*p) {
        fwrite((*p)->p, 1, (*p)->n, stdout);
        putchar(' ');
        p++;
    }
    putchar('\n');
    p = q;
    while(*p != NULL) {
        free((*p)->p);
        free(*p++);
    }
    free(q);
    return 0;
}

struct v **explode(const void *p, const void *q, size_t pn, size_t qn, size_t size) {
    void *tmp;
    struct v **rv = NULL;
    size_t i;
    ptrdiff_t j;
    pn *= size;
    qn *= size;
    for(i = 0;; i++) {
        while(pn >= qn && memcmp(p, q, qn) == 0) {
            p = (char *)p + qn;
            pn -= qn;
        }
        if((tmp = realloc(rv, (i + 1) * sizeof *rv)) == NULL) {
            while(i--) {
                free(rv[i]->p);
                free(rv[i]);
            }
            free(rv);
            return NULL;
        }
        rv = tmp;
        rv[i] = NULL;
        if(pn == 0) break;
        if((rv[i] = malloc(sizeof **rv)) == NULL) {
            while(i--) {
                free(rv[i]->p);
                free(rv[i]);
            }
            free(rv);
            return NULL;
        }
        if((tmp = memmem(p, q, pn, qn, size)) == NULL) {
            if((rv[i]->p = memdup(p, pn)) == NULL) {
                while(i--) {
                    free(rv[i]->p);
                    free(rv[i]);
                }
                free(rv);
                return NULL;
            }
            rv[i]->n = pn;
            break;
        }
        else {
            j = (char *)tmp - (char *)p;
            if((tmp = memdup(p, j)) == NULL) {
                while(i--) {
                    free(rv[i]->p);
                    free(rv[i]);
                }
                free(rv);
                return NULL;
            }
            rv[i]->p = tmp;
            rv[i]->n = j;
            p = (char *)p + j;
            pn -= j;
        }
    }
    if(rv[i] != NULL) {
        if((tmp = realloc(rv, (i + 2) * sizeof *rv)) == NULL) {
            i++;
            while(i--) {
                free(rv[i]->p);
                free(rv[i]);
            }
            free(rv);
            return NULL;
        }
        rv[i + 1] = NULL;
    }
    return rv;
}

void *memdup(const void *p, size_t n) {
    void *rv = NULL;
    if((rv = malloc(n)) != NULL) memcpy(rv, p, n);
    return rv;
}

void *memmem(const void *p, const void *q, size_t pn, size_t qn, size_t size) {
    for(; pn >= qn; p = (unsigned char *)p + 1, pn -= size)
    if(memcmp(p, q, qn) == 0) return p;
    return NULL;
}

Name: Anonymous 2015-07-29 1:15

Here's another,

// Search for string in file
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>

int main(int argc, char **argv) {
  FILE *fp = stdin;
  const char *search;
  char *p, *s;
  int c;
  size_t i, j;
  ptrdiff_t k;
  if(argc < 2) abort();
  search = argv[1];
  if(argc > 2 && (fp = fopen(argv[2], "rb")) == NULL) abort();
  i = j = strlen(search);
  if((p = s = malloc(j)) == NULL) abort();
  while(j-- && (c = getc(fp)) != EOF) *s++ = c;
  while(c != EOF) {
	  k = s - p;
	  if(k == i) s = p, k = 0;
	  if(memcmp(s, search, i - k) == 0)
		if(k == 0) break;
		else if(memcmp(p, &search[i - k], k) == 0) break;
	  if((c = getc(fp)) != EOF) *s++ = c;
  }
  free(p);
  if(c == EOF)
	if(feof(fp)) printf("'%s' not found!\n", search);
	else abort();
  else printf("'%s' found in offset %#lx\n", search, (unsigned long)(ftell(fp) - i));
  fclose(fp);
  return 0;
}

Name: Anonymous 2015-07-29 1:19

Lastly,

// uniq("abdca") ==> "abcd"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>

#define BITSET(a, b) ((a)[BITSLOT(b)] |= BITMASK(b))
#define BITTEST(a, b) ((a)[BITSLOT(b)] & BITMASK(b))
#define BITNSLOTS(nb) ((nb + CHAR_BIT - 1) / CHAR_BIT)
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#define MAX(x, y) (((x) > (y)) ? (x) : (y))

void uniq(char *p) {
    char m[BITNSLOTS(UCHAR_MAX)] = {0}, *q;
    size_t i;
    unsigned char min, max;
    for(min = UCHAR_MAX, max = 0, q = p; *q != 0; q++) {
        min = MIN(min, (unsigned char)*q);
        max = MAX(max, (unsigned char)*q);
        BITSET(m, (unsigned char)*q);
    }
    for(i = min, q = p; i < max; i++)
    if(BITTEST(m, i)) *q++ = i;
    if(max) *q++ = max;
    *q = 0;
}

int main(int argc, char **argv) {
    if(argc == 2) {
        uniq(argv[1]);
        puts(argv[1]);
    }
    return 0;
}

Name: Anonymous 2015-07-29 1:25

I'm intrigued. What does this do?

Name: Anonymous 2015-07-29 1:34

>>4

Well, >>1 just copies the behavior of PHP explode() <http://php.net/manual/en/function.explode.php>, although it works on chunks of bytes, so you can explode, for example, an array of integers.

Now, >>2 simply searches for a string in a file and reports back as soon as it is found. The algorithm is O(nm), with a cellular matrix implemented in place. This is written with memory restrictions in mind.

The last code, >>3 is a unique sort of a character array. It modifies the original array and uses a bit array (again, memory restrictions were assumed).

Name: Anonymous 2015-07-29 1:36

>>5
Did I say "cellular"? I meant circular.

Name: Anonymous 2015-07-29 2:33

>>5
What a funny name for split.

Name: Anonymous 2015-07-29 9:46

>>7

split

What a funny name for strtok.

Name: Anonymous 2015-07-29 10:25

>>8
What's a "strtok"? Is it in Klingon?

Name: Anonymous 2015-07-29 10:28

You forgot something
#include "void.h"

Name: Cudder !cXCudderUE 2015-07-29 12:46

realloc(rv, (i + 2) * sizeof *rv)

Congratulations retard, you just made a quadratic-time algorithm.

That explode is disgusting.

Name: Anonymous 2015-07-29 17:09

>>11
Those are some explosive dubs.

Name: Anonymous 2015-07-29 19:09

>>11
I don't think so. That line is out of any loop. A simple call to realloc doesn't make anything quadratic.

Name: Anonymous 2015-07-29 19:38

>>13
[i][b][o]IGNORE CUDDER POSTERS
DO NOT REPLY TO CUDDER POSTS
HIDE CUDDER THREADS[/i][/b][[/o]

Name: Anonymous 2015-07-29 19:51

>>13
for(i = 0;; i++) {
It's quadratic in the number of fragments.
i is the number of rv blocks, and it tries to grow the array of rv pointers by only 1 each time, realloc() potentially has to move and copy the memory if it can't expand the current block.

Even if you fix it to a geometric progression, it's still way too many allocations and deallocations, should run through p once and record all the offsets of where it should be split into an index array, then allocate entire rv once and then copy the fragments there using the index array.
Two passes through p, but I reckon it's way better than allocating and reallocing all the time.

Name: Anonymous 2015-07-29 20:12

>>15
You're right. So the question is, which is better? Two passes through p or n reallocations? It really depends on the problem, doesn't it? Big strings with mostly one occurrence come to mind. I'm not trying to say my code is the best, but that it has its place.

Most of the time people criticise code in the wild about how it could be done better are missing this.

Name: Anonymous 2015-07-29 20:53

>>16

Two passes through p or n reallocations?

False dichotomy. You can do it in log(n) reallocations by doubling the size of rv each time, with a final realloc to trim off the excess allocated space (which won't relocate it in memory). In fact, I think this is the way most resizeable containers are implemented.

Name: >>17 2015-07-29 20:54

"Each time" referring to each time you hit the end of your currently allocated block, not each time the loop iterates.

Name: Anonymous 2015-07-29 21:01

>>17,18
What about memory? Sure, that's another solution (notice logn and n are equivalent for small n).

I was not taking every case, just mine and the one mentioned.

Name: Anonymous 2015-07-29 21:15

>>19

What about memory?

It never allocates more than twice what it needs, which I think is a reasonable overhead when you take into account the difference in the number of reallocations required.

notice logn and n are equivalent for small n

If you start by allocating ~16 entries or so, small n will never need to realloc to a larger size. Of course, this contradicts what I just said above (for small n), but I don't think a very-short-lived block of 16 pointers is going to run anyone out of memory. In my opinion this is the optimal point in the speed/memory tradeoff.

Name: Anonymous 2015-07-29 22:10

>>20
You can also take into account the fact realloc is probably doing that itself. In implementations where realloc doesn't do this by itself you are left wondering if your code should (why would you know better than the libc implementers?).

Name: Anonymous 2015-07-29 22:12

>>21
dubs

Name: Anonymous 2015-07-29 22:20

>>21
You're right. Noobs don't realize realloc can allocate more than requested and return the same pointer for anything up to that size.

Name: Anonymous 2015-07-30 12:27

#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
#include <stdbool.h>

char **explode(const char *src, const char *delim) {
    size_t src_len = strlen(src),
           delim_len = strlen(delim);
    ptrdiff_t delim_diff = delim_len - 1;
    char *psrc = NULL,
         *psrc2 = NULL,
         *pdelim = NULL,
         **ret = NULL,
         *buf = malloc(src_len),
         *start = NULL;
    int found_cnt = 0,
        k = 0,
        m = 0;
    bool in_flag = false;

    for(psrc = src; *psrc != NULL; psrc++) {
        if(*psrc == *delim) {
            if(!strncmp(psrc, delim, delim_len - 1)) {
                found_cnt++;
            }
        }
    }

    ret = malloc((found_cnt + 3) * sizeof(char *));

    for(psrc = src; *psrc != NULL; psrc++) {
        if(!start && *psrc == *delim) {
            if(!strncmp(psrc, delim, delim_diff)) {
                start = psrc;
                continue;
            }
        } else if(start && (psrc - start) <= delim_diff) {
            continue;
        } else if(start && (psrc - start) > delim_diff) {
            buf[k++] = NULL;
            k = 0;
            ret[m++] = malloc(strlen(buf));
            memcpy(ret[m - 1], buf, strlen(buf));
            start = NULL;
        }
        buf[k++] = *psrc;
    }
    buf[k++] = NULL;
    ret[m++] = malloc(strlen(buf));
    memcpy(ret[m - 1], buf, strlen(buf));
    free(buf);

    ret[m++] = NULL;

    return ret;
}

int main(int argc, char **argv) {
    if(argc < 3) {
        printf("Usage:\n"
                "\tListen, nigger: \n\t\t$ ./explode \"my series of tokens\" \" \"\n"
                "\t\t$ ./explode \"my...series...of...tokens\" \"...\"\n");
        return 0;
    }
    char **toks = explode(argv[1], argv[2]);
    for(char **ptoks = toks; *ptoks != NULL; ptoks++) {
        printf("Token: %s\n", *ptoks);
        free(*ptoks);
    }
    free(toks);
    return 0;
}

10KB of lorem ipsum tokenized by spaces:
real 0m0.020s
user 0m0.007s
sys 0m0.006s

5KB of lorem ipsum tokenize by spaces using OP's program
real 0m0.630s
user 0m0.052s
sys 0m0.009s

Name: Cudder !cXCudderUE 2015-07-30 14:37

>>16

; in:
;  esi = source
;  edi = delim
;  edx = source length
;  ecx = delim length
; out:
;  eax = ptr to splits, {ptr,len}, null-terminated, 0 on err
;  all registers change, except esp and ebx
explode:
 mov ebp, esp
 jmp begin_loop
exploop:
 push esi
 push ecx
 push edi
 repnz cmpsb
 pop edi
 pop ecx
 jz match
 pop esi
 inc esi
 dec edx
 jmp loop_cmp
match:
 pop eax
 sub eax, [esp]
 push eax
 sub edx, ecx
begin_loop:
 push esi
loop_cmp:
 cmp ecx, edx
 jbe exploop
expdone:
 push edx
 push 0
 sub ebp, esp
 push ebp
 shr ebp, 2
 call malloc
 test eax, eax
 pop ecx
 jz malerr
 mov ecx, ebp ; not needed if malloc does not modify arg
retloop:
 pop [eax+ecx*4-4]
 loop retloop
malerr:
 ret

One pass through p, and one "reallocation" (try doing that in C!)

Name: Cudder !cXCudderUE 2015-07-30 14:38

s/repnz/repz/

Name: Anonymous 2015-07-30 15:22

I want to swallow Cudder's cum. That way I'd acquire her mad Assembly superpowers.

Name: Anonymous 2015-07-31 1:11

>>25
But edx has source length. That's edx = strlen(str);, which is the second hidden pass.

Name: Cudder !cXCudderUE 2015-07-31 1:46

>>28
You're assuming the strings are null-terminated and not explicitly length-delimited. Like the OP's code, >>25 works for arbitrary data and not only null-terminated strings.

Name: Anonymous 2015-07-31 4:34

assume my anus

Name: Anonymous 2015-07-31 5:05

assume this: a pack of wild niggers

Name: Anonymous 2015-07-31 5:15

>>25
Less code, quite easy to understand and very efficient.
Why do we use C again?

Name: Anonymous 2015-07-31 5:55

>>32
It's normally a waste of programmer time to require the mental overhead of writing the whole system this way. It's normally more productive to have the programmers to complete the system in a high paradigm and then focus on the provably time critical parts i.e we've actually profiled the run times of our code. We should forget about small efficiencies, say about 97% of the time: premature optimization is the root of all evil. Yet we should not pass up our opportunities in that critical 3%.

Name: Cudder !cXCudderUE 2015-07-31 10:46

>>33
That mentality creates systems which are slow and resource-consuming everywhere. The 3% gets spread out over the whole thing and it's hard to see what needs to be optimised because everything is as slow as everything else and nothing stands out anymore.

Name: Anonymous 2015-07-31 11:03

>>34
It's a trade-off. We have limited energy and a finite lifespan.

Name: Anonymous 2015-07-31 14:50

>>35
Well excuse me but making your code run efficiently seems just a little more important than worrying about your mortality.

Name: Anonymous 2015-07-31 15:19

>>34

That mentality creates systems which are slow and resource-consuming everywhere.

I think this problem is not significant for the case of commodity high performance computing of modern personal computers. If something is irritatingly slow, it's often not too difficult to refactor the experience so it feel less irritating. There are certainly cases of computing that require tight resource accounting like in the case of embedded programming or battery powered computing; the vast majority of computing doesn't mandate such strict accountability as most computing is done on high performance machines.

Name: Anonymous 2015-07-31 19:00

>>37
Actually, most computing is done on low-resource embedded devices:

If you round off the fractions, embedded systems consume 100% of the worldwide production of microprocessors.

http://www.dc.uba.ar/materias/disfpga/2011/c2/descargas/Embedded.doc

Name: Anonymous 2015-07-31 21:35

>>38

What should Firefox do with this file?

Name: Anonymous 2015-07-31 22:11

>>39
insert it inside

[C] explode()

1 Name: Anonymous 2015-07-29 1:02

2 Name: Anonymous 2015-07-29 1:15

3 Name: Anonymous 2015-07-29 1:19

4 Name: Anonymous 2015-07-29 1:25

5 Name: Anonymous 2015-07-29 1:34

6 Name: Anonymous 2015-07-29 1:36

7 Name: Anonymous 2015-07-29 2:33

8 Name: Anonymous 2015-07-29 9:46

9 Name: Anonymous 2015-07-29 10:25

10 Name: Anonymous 2015-07-29 10:28

11 Name: Cudder !cXCudderUE 2015-07-29 12:46

12 Name: Anonymous 2015-07-29 17:09

13 Name: Anonymous 2015-07-29 19:09

14 Name: Anonymous 2015-07-29 19:38

15 Name: Anonymous 2015-07-29 19:51

16 Name: Anonymous 2015-07-29 20:12

17 Name: Anonymous 2015-07-29 20:53

18 Name: >>17 2015-07-29 20:54

19 Name: Anonymous 2015-07-29 21:01

20 Name: Anonymous 2015-07-29 21:15

21 Name: Anonymous 2015-07-29 22:10

22 Name: Anonymous 2015-07-29 22:12

23 Name: Anonymous 2015-07-29 22:20

24 Name: Anonymous 2015-07-30 12:27

25 Name: Cudder !cXCudderUE 2015-07-30 14:37

26 Name: Cudder !cXCudderUE 2015-07-30 14:38

27 Name: Anonymous 2015-07-30 15:22

28 Name: Anonymous 2015-07-31 1:11

29 Name: Cudder !cXCudderUE 2015-07-31 1:46

30 Name: Anonymous 2015-07-31 4:34

31 Name: Anonymous 2015-07-31 5:05

32 Name: Anonymous 2015-07-31 5:15

33 Name: Anonymous 2015-07-31 5:55

34 Name: Cudder !cXCudderUE 2015-07-31 10:46

35 Name: Anonymous 2015-07-31 11:03

36 Name: Anonymous 2015-07-31 14:50

37 Name: Anonymous 2015-07-31 15:19

38 Name: Anonymous 2015-07-31 19:00

39 Name: Anonymous 2015-07-31 21:35

40 Name: Anonymous 2015-07-31 22:11