LLVM OpenMP* Runtime Library
extractExternal.cpp
1 /*
2  * extractExternal.cpp
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include <fstream>
17 #include <iostream>
18 #include <map>
19 #include <set>
20 #include <stdlib.h>
21 #include <string>
22 #include <strstream>
23 
24 /* Given a set of n object files h ('external' object files) and a set of m
25  object files o ('internal' object files),
26  1. Determines r, the subset of h that o depends on, directly or indirectly
27  2. Removes the files in h - r from the file system
28  3. For each external symbol defined in some file in r, rename it in r U o
29  by prefixing it with "__kmp_external_"
30  Usage:
31  hide.exe <n> <filenames for h> <filenames for o>
32 
33  Thus, the prefixed symbols become hidden in the sense that they now have a
34  special prefix.
35 */
36 
37 using namespace std;
38 
39 void stop(char *errorMsg) {
40  printf("%s\n", errorMsg);
41  exit(1);
42 }
43 
44 // an entry in the symbol table of a .OBJ file
45 class Symbol {
46 public:
47  __int64 name;
48  unsigned value;
49  unsigned short sectionNum, type;
50  char storageClass, nAux;
51 };
52 
53 class _rstream : public istrstream {
54 private:
55  const char *buf;
56 
57 protected:
58  _rstream(pair<const char *, streamsize> p)
59  : istrstream(p.first, p.second), buf(p.first) {}
60  ~_rstream() { delete[] buf; }
61 };
62 
63 // A stream encapuslating the content of a file or the content of a string,
64 // overriding the >> operator to read various integer types in binary form,
65 // as well as a symbol table entry.
66 class rstream : public _rstream {
67 private:
68  template <class T> inline rstream &doRead(T &x) {
69  read((char *)&x, sizeof(T));
70  return *this;
71  }
72  static pair<const char *, streamsize> getBuf(const char *fileName) {
73  ifstream raw(fileName, ios::binary | ios::in);
74  if (!raw.is_open())
75  stop("rstream.getBuf: Error opening file");
76  raw.seekg(0, ios::end);
77  streampos fileSize = raw.tellg();
78  if (fileSize < 0)
79  stop("rstream.getBuf: Error reading file");
80  char *buf = new char[fileSize];
81  raw.seekg(0, ios::beg);
82  raw.read(buf, fileSize);
83  return pair<const char *, streamsize>(buf, fileSize);
84  }
85 
86 public:
87  // construct from a string
88  rstream(const char *buf, streamsize size)
89  : _rstream(pair<const char *, streamsize>(buf, size)) {}
90  // construct from a file whole content is fully read once to initialize the
91  // content of this stream
92  rstream(const char *fileName) : _rstream(getBuf(fileName)) {}
93  rstream &operator>>(int &x) { return doRead(x); }
94  rstream &operator>>(unsigned &x) { return doRead(x); }
95  rstream &operator>>(short &x) { return doRead(x); }
96  rstream &operator>>(unsigned short &x) { return doRead(x); }
97  rstream &operator>>(Symbol &e) {
98  read((char *)&e, 18);
99  return *this;
100  }
101 };
102 
103 // string table in a .OBJ file
104 class StringTable {
105 private:
106  map<string, unsigned> directory;
107  size_t length;
108  char *data;
109 
110  // make <directory> from <length> bytes in <data>
111  void makeDirectory(void) {
112  unsigned i = 4;
113  while (i < length) {
114  string s = string(data + i);
115  directory.insert(make_pair(s, i));
116  i += s.size() + 1;
117  }
118  }
119  // initialize <length> and <data> with contents specified by the arguments
120  void init(const char *_data) {
121  unsigned _length = *(unsigned *)_data;
122 
123  if (_length < sizeof(unsigned) || _length != *(unsigned *)_data)
124  stop("StringTable.init: Invalid symbol table");
125  if (_data[_length - 1]) {
126  // to prevent runaway strings, make sure the data ends with a zero
127  data = new char[length = _length + 1];
128  data[_length] = 0;
129  } else {
130  data = new char[length = _length];
131  }
132  *(unsigned *)data = length;
133  KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned),
134  length - sizeof(unsigned));
135  makeDirectory();
136  }
137 
138 public:
139  StringTable(rstream &f) {
140  // Construct string table by reading from f.
141  streampos s;
142  unsigned strSize;
143  char *strData;
144 
145  s = f.tellg();
146  f >> strSize;
147  if (strSize < sizeof(unsigned))
148  stop("StringTable: Invalid string table");
149  strData = new char[strSize];
150  *(unsigned *)strData = strSize;
151  // read the raw data into <strData>
152  f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned));
153  s = f.tellg() - s;
154  if (s < strSize)
155  stop("StringTable: Unexpected EOF");
156  init(strData);
157  delete[] strData;
158  }
159  StringTable(const set<string> &strings) {
160  // Construct string table from given strings.
161  char *p;
162  set<string>::const_iterator it;
163  size_t s;
164 
165  // count required size for data
166  for (length = sizeof(unsigned), it = strings.begin(); it != strings.end();
167  ++it) {
168  size_t l = (*it).size();
169 
170  if (l > (unsigned)0xFFFFFFFF)
171  stop("StringTable: String too long");
172  if (l > 8) {
173  length += l + 1;
174  if (length > (unsigned)0xFFFFFFFF)
175  stop("StringTable: Symbol table too long");
176  }
177  }
178  data = new char[length];
179  *(unsigned *)data = length;
180  // populate data and directory
181  for (p = data + sizeof(unsigned), it = strings.begin(); it != strings.end();
182  ++it) {
183  const string &str = *it;
184  size_t l = str.size();
185  if (l > 8) {
186  directory.insert(make_pair(str, p - data));
187  KMP_MEMCPY(p, str.c_str(), l);
188  p[l] = 0;
189  p += l + 1;
190  }
191  }
192  }
193  ~StringTable() { delete[] data; }
194  // Returns encoding for given string based on this string table. Error if
195  // string length is greater than 8 but string is not in the string table
196  // -- returns 0.
197  __int64 encode(const string &str) {
198  __int64 r;
199 
200  if (str.size() <= 8) {
201  // encoded directly
202  ((char *)&r)[7] = 0;
203  KMP_STRNCPY_S((char *)&r, sizeof(r), str.c_str(), 8);
204  return r;
205  } else {
206  // represented as index into table
207  map<string, unsigned>::const_iterator it = directory.find(str);
208  if (it == directory.end())
209  stop("StringTable::encode: String now found in string table");
210  ((unsigned *)&r)[0] = 0;
211  ((unsigned *)&r)[1] = (*it).second;
212  return r;
213  }
214  }
215  // Returns string represented by x based on this string table. Error if x
216  // references an invalid position in the table--returns the empty string.
217  string decode(__int64 x) const {
218  if (*(unsigned *)&x == 0) {
219  // represented as index into table
220  unsigned &p = ((unsigned *)&x)[1];
221  if (p >= length)
222  stop("StringTable::decode: Invalid string table lookup");
223  return string(data + p);
224  } else {
225  // encoded directly
226  char *p = (char *)&x;
227  int i;
228 
229  for (i = 0; i < 8 && p[i]; ++i)
230  ;
231  return string(p, i);
232  }
233  }
234  void write(ostream &os) { os.write(data, length); }
235 };
236 
237 // for the named object file, determines the set of defined symbols and the set
238 // of undefined external symbols and writes them to <defined> and <undefined>
239 // respectively
240 void computeExternalSymbols(const char *fileName, set<string> *defined,
241  set<string> *undefined) {
242  streampos fileSize;
243  size_t strTabStart;
244  unsigned symTabStart, symNEntries;
245  rstream f(fileName);
246 
247  f.seekg(0, ios::end);
248  fileSize = f.tellg();
249 
250  f.seekg(8);
251  f >> symTabStart >> symNEntries;
252  // seek to the string table
253  f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
254  if (f.eof()) {
255  printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart "
256  "= %u, symNEntries = %u\n",
257  fileName, (unsigned long)fileSize, symTabStart, symNEntries);
258  stop("computeExternalSymbols: Unexpected EOF 1");
259  }
260  StringTable stringTable(f); // read the string table
261  if (f.tellg() != fileSize)
262  stop("computeExternalSymbols: Unexpected data after string table");
263 
264  f.clear();
265  f.seekg(symTabStart); // seek to the symbol table
266 
267  defined->clear();
268  undefined->clear();
269  for (int i = 0; i < symNEntries; ++i) {
270  // process each entry
271  Symbol e;
272 
273  if (f.eof())
274  stop("computeExternalSymbols: Unexpected EOF 2");
275  f >> e;
276  if (f.fail())
277  stop("computeExternalSymbols: File read error");
278  if (e.nAux) { // auxiliary entry: skip
279  f.seekg(e.nAux * 18, ios::cur);
280  i += e.nAux;
281  }
282  // if symbol is extern and defined in the current file, insert it
283  if (e.storageClass == 2)
284  if (e.sectionNum)
285  defined->insert(stringTable.decode(e.name));
286  else
287  undefined->insert(stringTable.decode(e.name));
288  }
289 }
290 
291 // For each occurrence of an external symbol in the object file named by
292 // by <fileName> that is a member of <hide>, renames it by prefixing
293 // with "__kmp_external_", writing back the file in-place
294 void hideSymbols(char *fileName, const set<string> &hide) {
295  static const string prefix("__kmp_external_");
296  set<string> strings; // set of all occurring symbols, appropriately prefixed
297  streampos fileSize;
298  size_t strTabStart;
299  unsigned symTabStart, symNEntries;
300  int i;
301  rstream in(fileName);
302 
303  in.seekg(0, ios::end);
304  fileSize = in.tellg();
305 
306  in.seekg(8);
307  in >> symTabStart >> symNEntries;
308  in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
309  if (in.eof())
310  stop("hideSymbols: Unexpected EOF");
311  StringTable stringTableOld(in); // read original string table
312 
313  if (in.tellg() != fileSize)
314  stop("hideSymbols: Unexpected data after string table");
315 
316  // compute set of occurring strings with prefix added
317  for (i = 0; i < symNEntries; ++i) {
318  Symbol e;
319 
320  in.seekg(symTabStart + i * 18);
321  if (in.eof())
322  stop("hideSymbols: Unexpected EOF");
323  in >> e;
324  if (in.fail())
325  stop("hideSymbols: File read error");
326  if (e.nAux)
327  i += e.nAux;
328  const string &s = stringTableOld.decode(e.name);
329  // if symbol is extern and found in <hide>, prefix and insert into strings,
330  // otherwise, just insert into strings without prefix
331  strings.insert(
332  (e.storageClass == 2 && hide.find(s) != hide.end()) ? prefix + s : s);
333  }
334 
335  ofstream out(fileName, ios::trunc | ios::out | ios::binary);
336  if (!out.is_open())
337  stop("hideSymbols: Error opening output file");
338 
339  // make new string table from string set
340  StringTable stringTableNew = StringTable(strings);
341 
342  // copy input file to output file up to just before the symbol table
343  in.seekg(0);
344  char *buf = new char[symTabStart];
345  in.read(buf, symTabStart);
346  out.write(buf, symTabStart);
347  delete[] buf;
348 
349  // copy input symbol table to output symbol table with name translation
350  for (i = 0; i < symNEntries; ++i) {
351  Symbol e;
352 
353  in.seekg(symTabStart + i * 18);
354  if (in.eof())
355  stop("hideSymbols: Unexpected EOF");
356  in >> e;
357  if (in.fail())
358  stop("hideSymbols: File read error");
359  const string &s = stringTableOld.decode(e.name);
360  out.seekp(symTabStart + i * 18);
361  e.name = stringTableNew.encode(
362  (e.storageClass == 2 && hide.find(s) != hide.end()) ? prefix + s : s);
363  out.write((char *)&e, 18);
364  if (out.fail())
365  stop("hideSymbols: File write error");
366  if (e.nAux) {
367  // copy auxiliary symbol table entries
368  int nAux = e.nAux;
369  for (int j = 1; j <= nAux; ++j) {
370  in >> e;
371  out.seekp(symTabStart + (i + j) * 18);
372  out.write((char *)&e, 18);
373  }
374  i += nAux;
375  }
376  }
377  // output string table
378  stringTableNew.write(out);
379 }
380 
381 // returns true iff <a> and <b> have no common element
382 template <class T> bool isDisjoint(const set<T> &a, const set<T> &b) {
383  set<T>::const_iterator ita, itb;
384 
385  for (ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) {
386  const T &ta = *ita, &tb = *itb;
387  if (ta < tb)
388  ++ita;
389  else if (tb < ta)
390  ++itb;
391  else
392  return false;
393  }
394  return true;
395 }
396 
397 // PRE: <defined> and <undefined> are arrays with <nTotal> elements where
398 // <nTotal> >= <nExternal>. The first <nExternal> elements correspond to the
399 // external object files and the rest correspond to the internal object files.
400 // POST: file x is said to depend on file y if undefined[x] and defined[y] are
401 // not disjoint. Returns the transitive closure of the set of internal object
402 // files, as a set of file indexes, under the 'depends on' relation, minus the
403 // set of internal object files.
404 set<int> *findRequiredExternal(int nExternal, int nTotal, set<string> *defined,
405  set<string> *undefined) {
406  set<int> *required = new set<int>;
407  set<int> fresh[2];
408  int i, cur = 0;
409  bool changed;
410 
411  for (i = nTotal - 1; i >= nExternal; --i)
412  fresh[cur].insert(i);
413  do {
414  changed = false;
415  for (set<int>::iterator it = fresh[cur].begin(); it != fresh[cur].end();
416  ++it) {
417  set<string> &s = undefined[*it];
418 
419  for (i = 0; i < nExternal; ++i) {
420  if (required->find(i) == required->end()) {
421  if (!isDisjoint(defined[i], s)) {
422  // found a new qualifying element
423  required->insert(i);
424  fresh[1 - cur].insert(i);
425  changed = true;
426  }
427  }
428  }
429  }
430  fresh[cur].clear();
431  cur = 1 - cur;
432  } while (changed);
433  return required;
434 }
435 
436 int main(int argc, char **argv) {
437  int nExternal, nInternal, i;
438  set<string> *defined, *undefined;
439  set<int>::iterator it;
440 
441  if (argc < 3)
442  stop("Please specify a positive integer followed by a list of object "
443  "filenames");
444  nExternal = atoi(argv[1]);
445  if (nExternal <= 0)
446  stop("Please specify a positive integer followed by a list of object "
447  "filenames");
448  if (nExternal + 2 > argc)
449  stop("Too few external objects");
450  nInternal = argc - nExternal - 2;
451  defined = new set<string>[argc - 2];
452  undefined = new set<string>[argc - 2];
453 
454  // determine the set of defined and undefined external symbols
455  for (i = 2; i < argc; ++i)
456  computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2);
457 
458  // determine the set of required external files
459  set<int> *requiredExternal =
460  findRequiredExternal(nExternal, argc - 2, defined, undefined);
461  set<string> hide;
462 
463  // determine the set of symbols to hide--namely defined external symbols of
464  // the required external files
465  for (it = requiredExternal->begin(); it != requiredExternal->end(); ++it) {
466  int idx = *it;
467  set<string>::iterator it2;
468  // We have to insert one element at a time instead of inserting a range
469  // because the insert member function taking a range doesn't exist on
470  // Windows* OS, at least at the time of this writing.
471  for (it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2)
472  hide.insert(*it2);
473  }
474 
475  // process the external files--removing those that are not required and hiding
476  // the appropriate symbols in the others
477  for (i = 0; i < nExternal; ++i)
478  if (requiredExternal->find(i) != requiredExternal->end())
479  hideSymbols(argv[2 + i], hide);
480  else
481  remove(argv[2 + i]);
482  // hide the appropriate symbols in the internal files
483  for (i = nExternal + 2; i < argc; ++i)
484  hideSymbols(argv[i], hide);
485  return 0;
486 }