Commit | Line | Data |
---|---|---|
36457566 LC |
1 | #include "references.hh" |
2 | #include "hash.hh" | |
3 | #include "util.hh" | |
4 | #include "archive.hh" | |
5 | ||
6 | #include <map> | |
7 | #include <cstdlib> | |
8 | ||
9 | ||
10 | namespace nix { | |
11 | ||
12 | ||
13 | static unsigned int refLength = 32; /* characters */ | |
14 | ||
15 | ||
16 | static void search(const unsigned char * s, unsigned int len, | |
17 | StringSet & hashes, StringSet & seen) | |
18 | { | |
19 | static bool initialised = false; | |
20 | static bool isBase32[256]; | |
21 | if (!initialised) { | |
22 | for (unsigned int i = 0; i < 256; ++i) isBase32[i] = false; | |
23 | for (unsigned int i = 0; i < base32Chars.size(); ++i) | |
24 | isBase32[(unsigned char) base32Chars[i]] = true; | |
25 | initialised = true; | |
26 | } | |
27 | ||
28 | for (unsigned int i = 0; i + refLength <= len; ) { | |
29 | int j; | |
30 | bool match = true; | |
31 | for (j = refLength - 1; j >= 0; --j) | |
32 | if (!isBase32[(unsigned char) s[i + j]]) { | |
33 | i += j + 1; | |
34 | match = false; | |
35 | break; | |
36 | } | |
37 | if (!match) continue; | |
38 | string ref((const char *) s + i, refLength); | |
39 | if (hashes.find(ref) != hashes.end()) { | |
40 | debug(format("found reference to `%1%' at offset `%2%'") | |
41 | % ref % i); | |
42 | seen.insert(ref); | |
43 | hashes.erase(ref); | |
44 | } | |
45 | ++i; | |
46 | } | |
47 | } | |
48 | ||
49 | ||
50 | struct RefScanSink : Sink | |
51 | { | |
52 | HashSink hashSink; | |
53 | StringSet hashes; | |
54 | StringSet seen; | |
55 | ||
56 | string tail; | |
57 | ||
58 | RefScanSink() : hashSink(htSHA256) { } | |
59 | ||
60 | void operator () (const unsigned char * data, size_t len); | |
61 | }; | |
62 | ||
63 | ||
64 | void RefScanSink::operator () (const unsigned char * data, size_t len) | |
65 | { | |
66 | hashSink(data, len); | |
67 | ||
68 | /* It's possible that a reference spans the previous and current | |
69 | fragment, so search in the concatenation of the tail of the | |
70 | previous fragment and the start of the current fragment. */ | |
71 | string s = tail + string((const char *) data, len > refLength ? refLength : len); | |
72 | search((const unsigned char *) s.data(), s.size(), hashes, seen); | |
73 | ||
74 | search(data, len, hashes, seen); | |
75 | ||
76 | unsigned int tailLen = len <= refLength ? len : refLength; | |
77 | tail = | |
78 | string(tail, tail.size() < refLength - tailLen ? 0 : tail.size() - (refLength - tailLen)) + | |
79 | string((const char *) data + len - tailLen, tailLen); | |
80 | } | |
81 | ||
82 | ||
83 | PathSet scanForReferences(const string & path, | |
84 | const PathSet & refs, HashResult & hash) | |
85 | { | |
86 | RefScanSink sink; | |
87 | std::map<string, Path> backMap; | |
88 | ||
89 | /* For efficiency (and a higher hit rate), just search for the | |
90 | hash part of the file name. (This assumes that all references | |
91 | have the form `HASH-bla'). */ | |
92 | foreach (PathSet::const_iterator, i, refs) { | |
93 | string baseName = baseNameOf(*i); | |
94 | string::size_type pos = baseName.find('-'); | |
95 | if (pos == string::npos) | |
96 | throw Error(format("bad reference `%1%'") % *i); | |
97 | string s = string(baseName, 0, pos); | |
98 | assert(s.size() == refLength); | |
99 | assert(backMap.find(s) == backMap.end()); | |
100 | // parseHash(htSHA256, s); | |
101 | sink.hashes.insert(s); | |
102 | backMap[s] = *i; | |
103 | } | |
104 | ||
105 | /* Look for the hashes in the NAR dump of the path. */ | |
106 | dumpPath(path, sink); | |
107 | ||
108 | /* Map the hashes found back to their store paths. */ | |
109 | PathSet found; | |
110 | foreach (StringSet::iterator, i, sink.seen) { | |
111 | std::map<string, Path>::iterator j; | |
112 | if ((j = backMap.find(*i)) == backMap.end()) abort(); | |
113 | found.insert(j->second); | |
114 | } | |
115 | ||
116 | hash = sink.hashSink.finish(); | |
117 | ||
118 | return found; | |
119 | } | |
120 | ||
121 | ||
122 | } |